1497 *
constexpr double kappa = 1
e-6;
1498 *
constexpr double reference_density = 3300;
1499 *
constexpr double reference_temperature = 293;
1500 *
constexpr double expansion_coefficient = 2
e-5;
1501 *
constexpr double specific_heat = 1250;
1502 *
constexpr double radiogenic_heating = 7.4e-12;
1505 *
constexpr double R0 = 6371000. - 2890000.;
1506 *
constexpr double R1 = 6371000. - 35000.;
1508 *
constexpr double T0 = 4000 + 273;
1509 *
constexpr double T1 = 700 + 273;
1514 * The next set of definitions are
for functions that encode the density
1515 * as a function of temperature, the gravity vector, and the
initial
1516 * values
for the temperature. Again, all of these (along with the values
1517 * they compute) are discussed in the introduction:
1520 *
double density(
const double temperature)
1523 * reference_density *
1524 * (1 - expansion_coefficient * (temperature - reference_temperature)));
1528 *
template <
int dim>
1531 *
const double r = p.norm();
1532 *
return -(1.245e-6 * r + 7.714e13 / r / r) * p / r;
1537 *
template <
int dim>
1538 *
class TemperatureInitialValues :
public Function<dim>
1541 * TemperatureInitialValues()
1542 * : Function<dim>(1)
1545 *
virtual double value(
const Point<dim> &p,
1546 *
const unsigned int component = 0)
const override;
1549 * Vector<double> &
value)
const override;
1554 *
template <
int dim>
1555 *
double TemperatureInitialValues<dim>::value(
const Point<dim> &p,
1556 *
const unsigned int)
const
1558 *
const double r = p.norm();
1559 *
const double h = R1 - R0;
1561 *
const double s = (r - R0) / h;
1564 *
const double phi = std::atan2(p[0], p[1]);
1565 *
const double tau = s + 0.2 * s * (1 - s) *
std::sin(6 * phi) * q;
1567 *
return T0 * (1.0 - tau) + T1 * tau;
1571 *
template <
int dim>
1573 * TemperatureInitialValues<dim>::vector_value(
const Point<dim> &p,
1576 *
for (
unsigned int c = 0; c < this->n_components; ++c)
1577 * values(c) = TemperatureInitialValues<dim>::value(p, c);
1583 * As mentioned in the introduction we need to rescale the pressure to
1584 * avoid the relative ill-conditioning of the momentum and mass
1585 * conservation equations. The scaling factor is @f$\frac{\eta}{
L}@f$ where
1586 * @f$L@f$ was a typical length
scale. By experimenting it turns out that a
1587 * good length
scale is the
diameter of plumes, which is around 10 km:
1590 *
constexpr double pressure_scaling = eta / 10000;
1594 * The
final number in
this namespace is a
constant that denotes the
1595 * number of seconds per (average, tropical) year. We use
this only when
1596 * generating screen output: internally, all computations of
this program
1597 * happen in SI units (kilogram, meter, seconds) but writing geological
1598 * times in seconds yields
numbers that
one can
't relate to reality, and
1599 * so we convert to years using the factor defined here:
1602 * const double year_in_seconds = 60 * 60 * 24 * 365.2425;
1604 * } // namespace EquationData
1611 * <a name="step_32-PreconditioningtheStokessystem"></a>
1612 * <h3>Preconditioning the Stokes system</h3>
1616 * This namespace implements the preconditioner. As discussed in the
1617 * introduction, this preconditioner differs in a number of key portions
1618 * from the one used in @ref step_31 "step-31". Specifically, it is a right preconditioner,
1619 * implementing the matrix
1621 * \left(\begin{array}{cc}A^{-1} & A^{-1}B^TS^{-1}
1623 * \end{array}\right)
1625 * where the two inverse matrix operations
1626 * are approximated by linear solvers or, if the right flag is given to the
1627 * constructor of this class, by a single AMG V-cycle for the velocity
1628 * block. The three code blocks of the <code>vmult</code> function implement
1629 * the multiplications with the three blocks of this preconditioner matrix
1630 * and should be self explanatory if you have read through @ref step_31 "step-31" or the
1631 * discussion of composing solvers in @ref step_20 "step-20".
1634 * namespace LinearSolvers
1636 * template <class PreconditionerTypeA, class PreconditionerTypeMp>
1637 * class BlockSchurPreconditioner : public EnableObserverPointer
1640 * BlockSchurPreconditioner(const TrilinosWrappers::BlockSparseMatrix &S,
1641 * const TrilinosWrappers::BlockSparseMatrix &Spre,
1642 * const PreconditionerTypeMp &Mppreconditioner,
1643 * const PreconditionerTypeA &Apreconditioner,
1644 * const bool do_solve_A)
1645 * : stokes_matrix(&S)
1646 * , stokes_preconditioner_matrix(&Spre)
1647 * , mp_preconditioner(Mppreconditioner)
1648 * , a_preconditioner(Apreconditioner)
1649 * , do_solve_A(do_solve_A)
1652 * void vmult(TrilinosWrappers::MPI::BlockVector &dst,
1653 * const TrilinosWrappers::MPI::BlockVector &src) const
1655 * TrilinosWrappers::MPI::Vector utmp(src.block(0));
1658 * SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
1660 * SolverCG<TrilinosWrappers::MPI::Vector> solver(solver_control);
1662 * solver.solve(stokes_preconditioner_matrix->block(1, 1),
1665 * mp_preconditioner);
1667 * dst.block(1) *= -1.0;
1671 * stokes_matrix->block(0, 1).vmult(utmp, dst.block(1));
1673 * utmp.add(src.block(0));
1676 * if (do_solve_A == true)
1678 * SolverControl solver_control(5000, utmp.l2_norm() * 1e-2);
1679 * TrilinosWrappers::SolverCG solver(solver_control);
1680 * solver.solve(stokes_matrix->block(0, 0),
1683 * a_preconditioner);
1686 * a_preconditioner.vmult(dst.block(0), utmp);
1690 * const ObserverPointer<const TrilinosWrappers::BlockSparseMatrix>
1692 * const ObserverPointer<const TrilinosWrappers::BlockSparseMatrix>
1693 * stokes_preconditioner_matrix;
1694 * const PreconditionerTypeMp &mp_preconditioner;
1695 * const PreconditionerTypeA &a_preconditioner;
1696 * const bool do_solve_A;
1698 * } // namespace LinearSolvers
1705 * <a name="step_32-Definitionofassemblydatastructures"></a>
1706 * <h3>Definition of assembly data structures</h3>
1710 * As described in the introduction, we will use the WorkStream mechanism
1711 * discussed in the @ref threads topic to parallelize operations among the
1712 * processors of a single machine. The WorkStream class requires that data
1713 * is passed around in two kinds of data structures, one for scratch data
1714 * and one to pass data from the assembly function to the function that
1715 * copies local contributions into global objects.
1719 * The following namespace (and the two sub-namespaces) contains a
1720 * collection of data structures that serve this purpose, one pair for each
1721 * of the four operations discussed in the introduction that we will want to
1722 * parallelize. Each assembly routine gets two sets of data: a Scratch array
1723 * that collects all the classes and arrays that are used for the
1724 * calculation of the cell contribution, and a CopyData array that keeps
1725 * local matrices and vectors which will be written into the global
1726 * matrix. Whereas CopyData is a container for the final data that is
1727 * written into the global matrices and vector (and, thus, absolutely
1728 * necessary), the Scratch arrays are merely there for performance reasons
1729 * — it would be much more expensive to set up a FEValues object on
1730 * each cell, than creating it only once and updating some derivative data.
1734 * @ref step_31 "step-31" had four assembly routines: One for the preconditioner matrix of
1735 * the Stokes system, one for the Stokes matrix and right hand side, one for
1736 * the temperature matrices and one for the right hand side of the
1737 * temperature equation. We here organize the scratch arrays and CopyData
1738 * objects for each of those four assembly components using a
1739 * <code>struct</code> environment (since we consider these as temporary
1740 * objects we pass around, rather than classes that implement functionality
1741 * of their own, though this is a more subjective point of view to
1742 * distinguish between <code>struct</code>s and <code>class</code>es).
1746 * Regarding the Scratch objects, each struct is equipped with a constructor
1747 * that creates an @ref FEValues object using the @ref FiniteElement,
1748 * Quadrature, @ref Mapping (which describes the interpolation of curved
1749 * boundaries), and @ref UpdateFlags instances. Moreover, we manually
1750 * implement a copy constructor (since the FEValues class is not copyable by
1751 * itself), and provide some additional vector fields that are used to hold
1752 * intermediate data during the computation of local contributions.
1756 * Let us start with the scratch arrays and, specifically, the one used for
1757 * assembly of the Stokes preconditioner:
1760 * namespace Assembly
1764 * template <int dim>
1765 * struct StokesPreconditioner
1767 * StokesPreconditioner(const FiniteElement<dim> &stokes_fe,
1768 * const Quadrature<dim> &stokes_quadrature,
1769 * const Mapping<dim> &mapping,
1770 * const UpdateFlags update_flags);
1772 * StokesPreconditioner(const StokesPreconditioner &data);
1775 * FEValues<dim> stokes_fe_values;
1777 * std::vector<Tensor<2, dim>> grad_phi_u;
1778 * std::vector<double> phi_p;
1781 * template <int dim>
1782 * StokesPreconditioner<dim>::StokesPreconditioner(
1783 * const FiniteElement<dim> &stokes_fe,
1784 * const Quadrature<dim> &stokes_quadrature,
1785 * const Mapping<dim> &mapping,
1786 * const UpdateFlags update_flags)
1787 * : stokes_fe_values(mapping, stokes_fe, stokes_quadrature, update_flags)
1788 * , grad_phi_u(stokes_fe.n_dofs_per_cell())
1789 * , phi_p(stokes_fe.n_dofs_per_cell())
1794 * template <int dim>
1795 * StokesPreconditioner<dim>::StokesPreconditioner(
1796 * const StokesPreconditioner &scratch)
1797 * : stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
1798 * scratch.stokes_fe_values.get_fe(),
1799 * scratch.stokes_fe_values.get_quadrature(),
1800 * scratch.stokes_fe_values.get_update_flags())
1801 * , grad_phi_u(scratch.grad_phi_u)
1802 * , phi_p(scratch.phi_p)
1809 * The next one is the scratch object used for the assembly of the full
1810 * Stokes system. Observe that we derive the StokesSystem scratch class
1811 * from the StokesPreconditioner class above. We do this because all the
1812 * objects that are necessary for the assembly of the preconditioner are
1813 * also needed for the actual matrix system and right hand side, plus
1814 * some extra data. This makes the program more compact. Note also that
1815 * the assembly of the Stokes system and the temperature right hand side
1816 * further down requires data from temperature and velocity,
1817 * respectively, so we actually need two FEValues objects for those two
1821 * template <int dim>
1822 * struct StokesSystem : public StokesPreconditioner<dim>
1824 * StokesSystem(const FiniteElement<dim> &stokes_fe,
1825 * const Mapping<dim> &mapping,
1826 * const Quadrature<dim> &stokes_quadrature,
1827 * const UpdateFlags stokes_update_flags,
1828 * const FiniteElement<dim> &temperature_fe,
1829 * const UpdateFlags temperature_update_flags);
1831 * StokesSystem(const StokesSystem<dim> &data);
1834 * FEValues<dim> temperature_fe_values;
1836 * std::vector<Tensor<1, dim>> phi_u;
1837 * std::vector<SymmetricTensor<2, dim>> grads_phi_u;
1838 * std::vector<double> div_phi_u;
1840 * std::vector<double> old_temperature_values;
1844 * template <int dim>
1845 * StokesSystem<dim>::StokesSystem(
1846 * const FiniteElement<dim> &stokes_fe,
1847 * const Mapping<dim> &mapping,
1848 * const Quadrature<dim> &stokes_quadrature,
1849 * const UpdateFlags stokes_update_flags,
1850 * const FiniteElement<dim> &temperature_fe,
1851 * const UpdateFlags temperature_update_flags)
1852 * : StokesPreconditioner<dim>(stokes_fe,
1853 * stokes_quadrature,
1855 * stokes_update_flags)
1856 * , temperature_fe_values(mapping,
1858 * stokes_quadrature,
1859 * temperature_update_flags)
1860 * , phi_u(stokes_fe.n_dofs_per_cell())
1861 * , grads_phi_u(stokes_fe.n_dofs_per_cell())
1862 * , div_phi_u(stokes_fe.n_dofs_per_cell())
1863 * , old_temperature_values(stokes_quadrature.size())
1867 * template <int dim>
1868 * StokesSystem<dim>::StokesSystem(const StokesSystem<dim> &scratch)
1869 * : StokesPreconditioner<dim>(scratch)
1870 * , temperature_fe_values(
1871 * scratch.temperature_fe_values.get_mapping(),
1872 * scratch.temperature_fe_values.get_fe(),
1873 * scratch.temperature_fe_values.get_quadrature(),
1874 * scratch.temperature_fe_values.get_update_flags())
1875 * , phi_u(scratch.phi_u)
1876 * , grads_phi_u(scratch.grads_phi_u)
1877 * , div_phi_u(scratch.div_phi_u)
1878 * , old_temperature_values(scratch.old_temperature_values)
1884 * After defining the objects used in the assembly of the Stokes system,
1885 * we do the same for the assembly of the matrices necessary for the
1886 * temperature system. The general structure is very similar:
1889 * template <int dim>
1890 * struct TemperatureMatrix
1892 * TemperatureMatrix(const FiniteElement<dim> &temperature_fe,
1893 * const Mapping<dim> &mapping,
1894 * const Quadrature<dim> &temperature_quadrature);
1896 * TemperatureMatrix(const TemperatureMatrix &data);
1899 * FEValues<dim> temperature_fe_values;
1901 * std::vector<double> phi_T;
1902 * std::vector<Tensor<1, dim>> grad_phi_T;
1906 * template <int dim>
1907 * TemperatureMatrix<dim>::TemperatureMatrix(
1908 * const FiniteElement<dim> &temperature_fe,
1909 * const Mapping<dim> &mapping,
1910 * const Quadrature<dim> &temperature_quadrature)
1911 * : temperature_fe_values(mapping,
1913 * temperature_quadrature,
1914 * update_values | update_gradients |
1915 * update_JxW_values)
1916 * , phi_T(temperature_fe.n_dofs_per_cell())
1917 * , grad_phi_T(temperature_fe.n_dofs_per_cell())
1921 * template <int dim>
1922 * TemperatureMatrix<dim>::TemperatureMatrix(
1923 * const TemperatureMatrix &scratch)
1924 * : temperature_fe_values(
1925 * scratch.temperature_fe_values.get_mapping(),
1926 * scratch.temperature_fe_values.get_fe(),
1927 * scratch.temperature_fe_values.get_quadrature(),
1928 * scratch.temperature_fe_values.get_update_flags())
1929 * , phi_T(scratch.phi_T)
1930 * , grad_phi_T(scratch.grad_phi_T)
1936 * The final scratch object is used in the assembly of the right hand
1937 * side of the temperature system. This object is significantly larger
1938 * than the ones above because a lot more quantities enter the
1939 * computation of the right hand side of the temperature equation. In
1940 * particular, the temperature values and gradients of the previous two
1941 * time steps need to be evaluated at the quadrature points, as well as
1942 * the velocities and the strain rates (i.e. the symmetric gradients of
1943 * the velocity) that enter the right hand side as friction heating
1944 * terms. Despite the number of terms, the following should be rather
1948 * template <int dim>
1949 * struct TemperatureRHS
1951 * TemperatureRHS(const FiniteElement<dim> &temperature_fe,
1952 * const FiniteElement<dim> &stokes_fe,
1953 * const Mapping<dim> &mapping,
1954 * const Quadrature<dim> &quadrature);
1956 * TemperatureRHS(const TemperatureRHS &data);
1959 * FEValues<dim> temperature_fe_values;
1960 * FEValues<dim> stokes_fe_values;
1962 * std::vector<double> phi_T;
1963 * std::vector<Tensor<1, dim>> grad_phi_T;
1965 * std::vector<Tensor<1, dim>> old_velocity_values;
1966 * std::vector<Tensor<1, dim>> old_old_velocity_values;
1968 * std::vector<SymmetricTensor<2, dim>> old_strain_rates;
1969 * std::vector<SymmetricTensor<2, dim>> old_old_strain_rates;
1971 * std::vector<double> old_temperature_values;
1972 * std::vector<double> old_old_temperature_values;
1973 * std::vector<Tensor<1, dim>> old_temperature_grads;
1974 * std::vector<Tensor<1, dim>> old_old_temperature_grads;
1975 * std::vector<double> old_temperature_laplacians;
1976 * std::vector<double> old_old_temperature_laplacians;
1980 * template <int dim>
1981 * TemperatureRHS<dim>::TemperatureRHS(
1982 * const FiniteElement<dim> &temperature_fe,
1983 * const FiniteElement<dim> &stokes_fe,
1984 * const Mapping<dim> &mapping,
1985 * const Quadrature<dim> &quadrature)
1986 * : temperature_fe_values(mapping,
1989 * update_values | update_gradients |
1990 * update_hessians | update_quadrature_points |
1991 * update_JxW_values)
1992 * , stokes_fe_values(mapping,
1995 * update_values | update_gradients)
1996 * , phi_T(temperature_fe.n_dofs_per_cell())
1997 * , grad_phi_T(temperature_fe.n_dofs_per_cell())
2000 * old_velocity_values(quadrature.size())
2001 * , old_old_velocity_values(quadrature.size())
2002 * , old_strain_rates(quadrature.size())
2003 * , old_old_strain_rates(quadrature.size())
2006 * old_temperature_values(quadrature.size())
2007 * , old_old_temperature_values(quadrature.size())
2008 * , old_temperature_grads(quadrature.size())
2009 * , old_old_temperature_grads(quadrature.size())
2010 * , old_temperature_laplacians(quadrature.size())
2011 * , old_old_temperature_laplacians(quadrature.size())
2015 * template <int dim>
2016 * TemperatureRHS<dim>::TemperatureRHS(const TemperatureRHS &scratch)
2017 * : temperature_fe_values(
2018 * scratch.temperature_fe_values.get_mapping(),
2019 * scratch.temperature_fe_values.get_fe(),
2020 * scratch.temperature_fe_values.get_quadrature(),
2021 * scratch.temperature_fe_values.get_update_flags())
2022 * , stokes_fe_values(scratch.stokes_fe_values.get_mapping(),
2023 * scratch.stokes_fe_values.get_fe(),
2024 * scratch.stokes_fe_values.get_quadrature(),
2025 * scratch.stokes_fe_values.get_update_flags())
2026 * , phi_T(scratch.phi_T)
2027 * , grad_phi_T(scratch.grad_phi_T)
2030 * old_velocity_values(scratch.old_velocity_values)
2031 * , old_old_velocity_values(scratch.old_old_velocity_values)
2032 * , old_strain_rates(scratch.old_strain_rates)
2033 * , old_old_strain_rates(scratch.old_old_strain_rates)
2036 * old_temperature_values(scratch.old_temperature_values)
2037 * , old_old_temperature_values(scratch.old_old_temperature_values)
2038 * , old_temperature_grads(scratch.old_temperature_grads)
2039 * , old_old_temperature_grads(scratch.old_old_temperature_grads)
2040 * , old_temperature_laplacians(scratch.old_temperature_laplacians)
2041 * , old_old_temperature_laplacians(scratch.old_old_temperature_laplacians)
2043 * } // namespace Scratch
2048 * The CopyData objects are even simpler than the Scratch objects as all
2049 * they have to do is to store the results of local computations until
2050 * they can be copied into the global matrix or vector objects. These
2051 * structures therefore only need to provide a constructor, a copy
2052 * operation, and some arrays for local matrix, local vectors and the
2053 * relation between local and global degrees of freedom (a.k.a.
2054 * <code>local_dof_indices</code>). Again, we have one such structure for
2055 * each of the four operations we will parallelize using the WorkStream
2059 * namespace CopyData
2061 * template <int dim>
2062 * struct StokesPreconditioner
2064 * StokesPreconditioner(const FiniteElement<dim> &stokes_fe);
2065 * StokesPreconditioner(const StokesPreconditioner &data);
2066 * StokesPreconditioner &operator=(const StokesPreconditioner &) = default;
2068 * FullMatrix<double> local_matrix;
2069 * std::vector<types::global_dof_index> local_dof_indices;
2072 * template <int dim>
2073 * StokesPreconditioner<dim>::StokesPreconditioner(
2074 * const FiniteElement<dim> &stokes_fe)
2075 * : local_matrix(stokes_fe.n_dofs_per_cell(), stokes_fe.n_dofs_per_cell())
2076 * , local_dof_indices(stokes_fe.n_dofs_per_cell())
2079 * template <int dim>
2080 * StokesPreconditioner<dim>::StokesPreconditioner(
2081 * const StokesPreconditioner &data)
2082 * : local_matrix(data.local_matrix)
2083 * , local_dof_indices(data.local_dof_indices)
2088 * template <int dim>
2089 * struct StokesSystem : public StokesPreconditioner<dim>
2091 * StokesSystem(const FiniteElement<dim> &stokes_fe);
2093 * Vector<double> local_rhs;
2096 * template <int dim>
2097 * StokesSystem<dim>::StokesSystem(const FiniteElement<dim> &stokes_fe)
2098 * : StokesPreconditioner<dim>(stokes_fe)
2099 * , local_rhs(stokes_fe.n_dofs_per_cell())
2104 * template <int dim>
2105 * struct TemperatureMatrix
2107 * TemperatureMatrix(const FiniteElement<dim> &temperature_fe);
2109 * FullMatrix<double> local_mass_matrix;
2110 * FullMatrix<double> local_stiffness_matrix;
2111 * std::vector<types::global_dof_index> local_dof_indices;
2114 * template <int dim>
2115 * TemperatureMatrix<dim>::TemperatureMatrix(
2116 * const FiniteElement<dim> &temperature_fe)
2117 * : local_mass_matrix(temperature_fe.n_dofs_per_cell(),
2118 * temperature_fe.n_dofs_per_cell())
2119 * , local_stiffness_matrix(temperature_fe.n_dofs_per_cell(),
2120 * temperature_fe.n_dofs_per_cell())
2121 * , local_dof_indices(temperature_fe.n_dofs_per_cell())
2126 * template <int dim>
2127 * struct TemperatureRHS
2129 * TemperatureRHS(const FiniteElement<dim> &temperature_fe);
2131 * Vector<double> local_rhs;
2132 * std::vector<types::global_dof_index> local_dof_indices;
2133 * FullMatrix<double> matrix_for_bc;
2136 * template <int dim>
2137 * TemperatureRHS<dim>::TemperatureRHS(
2138 * const FiniteElement<dim> &temperature_fe)
2139 * : local_rhs(temperature_fe.n_dofs_per_cell())
2140 * , local_dof_indices(temperature_fe.n_dofs_per_cell())
2141 * , matrix_for_bc(temperature_fe.n_dofs_per_cell(),
2142 * temperature_fe.n_dofs_per_cell())
2144 * } // namespace CopyData
2145 * } // namespace Assembly
2152 * <a name="step_32-ThecodeBoussinesqFlowProblemcodeclasstemplate"></a>
2153 * <h3>The <code>BoussinesqFlowProblem</code> class template</h3>
2157 * This is the declaration of the main class. It is very similar to @ref step_31 "step-31"
2158 * but there are a number differences we will comment on below.
2162 * The top of the class is essentially the same as in @ref step_31 "step-31", listing the
2163 * public methods and a set of private functions that do the heavy
2164 * lifting. Compared to @ref step_31 "step-31" there are only two additions to this
2165 * section: the function <code>get_cfl_number()</code> that computes the
2166 * maximum CFL number over all cells which we then compute the global time
2167 * step from, and the function <code>get_entropy_variation()</code> that is
2168 * used in the computation of the entropy stabilization. It is akin to the
2169 * <code>get_extrapolated_temperature_range()</code> we have used in @ref step_31 "step-31"
2170 * for this purpose, but works on the entropy instead of the temperature
2174 * template <int dim>
2175 * class BoussinesqFlowProblem
2178 * struct Parameters;
2179 * BoussinesqFlowProblem(Parameters ¶meters);
2183 * void setup_dofs();
2184 * void assemble_stokes_preconditioner();
2185 * void build_stokes_preconditioner();
2186 * void assemble_stokes_system();
2187 * void assemble_temperature_matrix();
2188 * void assemble_temperature_system(const double maximal_velocity);
2189 * double get_maximal_velocity() const;
2190 * double get_cfl_number() const;
2191 * double get_entropy_variation(const double average_temperature) const;
2192 * std::pair<double, double> get_extrapolated_temperature_range() const;
2194 * void output_results();
2195 * void refine_mesh(const unsigned int max_grid_level);
2197 * double compute_viscosity(
2198 * const std::vector<double> &old_temperature,
2199 * const std::vector<double> &old_old_temperature,
2200 * const std::vector<Tensor<1, dim>> &old_temperature_grads,
2201 * const std::vector<Tensor<1, dim>> &old_old_temperature_grads,
2202 * const std::vector<double> &old_temperature_laplacians,
2203 * const std::vector<double> &old_old_temperature_laplacians,
2204 * const std::vector<Tensor<1, dim>> &old_velocity_values,
2205 * const std::vector<Tensor<1, dim>> &old_old_velocity_values,
2206 * const std::vector<SymmetricTensor<2, dim>> &old_strain_rates,
2207 * const std::vector<SymmetricTensor<2, dim>> &old_old_strain_rates,
2208 * const double global_u_infty,
2209 * const double global_T_variation,
2210 * const double average_temperature,
2211 * const double global_entropy_variation,
2212 * const double cell_diameter) const;
2217 * The first significant new component is the definition of a struct for
2218 * the parameters according to the discussion in the introduction. This
2219 * structure is initialized by reading from a parameter file during
2220 * construction of this object.
2225 * Parameters(const std::string ¶meter_filename);
2227 * static void declare_parameters(ParameterHandler &prm);
2228 * void parse_parameters(ParameterHandler &prm);
2232 * unsigned int initial_global_refinement;
2233 * unsigned int initial_adaptive_refinement;
2235 * bool generate_graphical_output;
2236 * unsigned int graphical_output_interval;
2238 * unsigned int adaptive_refinement_interval;
2240 * double stabilization_alpha;
2241 * double stabilization_c_R;
2242 * double stabilization_beta;
2244 * unsigned int stokes_velocity_degree;
2245 * bool use_locally_conservative_discretization;
2247 * unsigned int temperature_degree;
2251 * Parameters ¶meters;
2255 * The <code>pcout</code> (for <i>%parallel <code>std::cout</code></i>)
2256 * object is used to simplify writing output: each MPI process can use
2257 * this to generate output as usual, but since each of these processes
2258 * will (hopefully) produce the same output it will just be replicated
2259 * many times over; with the ConditionalOStream class, only the output
2260 * generated by one MPI process will actually be printed to screen,
2261 * whereas the output by all the other threads will simply be forgotten.
2264 * ConditionalOStream pcout;
2268 * The following member variables will then again be similar to those in
2269 * @ref step_31 "step-31" (and to other tutorial programs). As mentioned in the
2270 * introduction, we fully distribute computations, so we will have to use
2271 * the parallel::distributed::Triangulation class (see @ref step_40 "step-40") but the
2272 * remainder of these variables is rather standard with two exceptions:
2276 * - The <code>mapping</code> variable is used to denote a higher-order
2277 * polynomial mapping. As mentioned in the introduction, we use this
2278 * mapping when forming integrals through quadrature for all cells.
2282 * - In a bit of naming confusion, you will notice below that some of the
2283 * variables from namespace TrilinosWrappers are taken from namespace
2284 * TrilinosWrappers::MPI (such as the right hand side vectors) whereas
2285 * others are not (such as the various matrices). This is due to legacy
2286 * reasons. We will frequently have to query velocities
2287 * and temperatures at arbitrary quadrature points; consequently, rather
2288 * than importing ghost information of a vector whenever we need access
2289 * to degrees of freedom that are relevant locally but owned by another
2290 * processor, we solve linear systems in %parallel but then immediately
2291 * initialize a vector including ghost entries of the solution for further
2292 * processing. The various <code>*_solution</code> vectors are therefore
2293 * filled immediately after solving their respective linear system in
2294 * %parallel and will always contain values for all
2295 * @ref GlossLocallyRelevantDof "locally relevant degrees of freedom";
2296 * the fully distributed vectors that we obtain from the solution process
2297 * and that only ever contain the
2298 * @ref GlossLocallyOwnedDof "locally owned degrees of freedom" are
2299 * destroyed immediately after the solution process and after we have
2300 * copied the relevant values into the member variable vectors.
2303 * parallel::distributed::Triangulation<dim> triangulation;
2304 * double global_Omega_diameter;
2306 * const MappingQ<dim> mapping;
2308 * const FESystem<dim> stokes_fe;
2309 * DoFHandler<dim> stokes_dof_handler;
2310 * AffineConstraints<double> stokes_constraints;
2312 * TrilinosWrappers::BlockSparseMatrix stokes_matrix;
2313 * TrilinosWrappers::BlockSparseMatrix stokes_preconditioner_matrix;
2315 * TrilinosWrappers::MPI::BlockVector stokes_solution;
2316 * TrilinosWrappers::MPI::BlockVector old_stokes_solution;
2317 * TrilinosWrappers::MPI::BlockVector stokes_rhs;
2320 * const FE_Q<dim> temperature_fe;
2321 * DoFHandler<dim> temperature_dof_handler;
2322 * AffineConstraints<double> temperature_constraints;
2324 * TrilinosWrappers::SparseMatrix temperature_mass_matrix;
2325 * TrilinosWrappers::SparseMatrix temperature_stiffness_matrix;
2326 * TrilinosWrappers::SparseMatrix temperature_matrix;
2328 * TrilinosWrappers::MPI::Vector temperature_solution;
2329 * TrilinosWrappers::MPI::Vector old_temperature_solution;
2330 * TrilinosWrappers::MPI::Vector old_old_temperature_solution;
2331 * TrilinosWrappers::MPI::Vector temperature_rhs;
2335 * double old_time_step;
2336 * unsigned int timestep_number;
2338 * std::shared_ptr<TrilinosWrappers::PreconditionAMG> Amg_preconditioner;
2339 * std::shared_ptr<TrilinosWrappers::PreconditionJacobi> Mp_preconditioner;
2340 * std::shared_ptr<TrilinosWrappers::PreconditionJacobi> T_preconditioner;
2342 * bool rebuild_stokes_matrix;
2343 * bool rebuild_stokes_preconditioner;
2344 * bool rebuild_temperature_matrices;
2345 * bool rebuild_temperature_preconditioner;
2349 * The next member variable, <code>computing_timer</code> is used to
2350 * conveniently account for compute time spent in certain "sections" of
2351 * the code that are repeatedly entered. For example, we will enter (and
2352 * leave) sections for Stokes matrix assembly and would like to accumulate
2353 * the run time spent in this section over all time steps. Every so many
2354 * time steps as well as at the end of the program (through the destructor
2355 * of the TimerOutput class) we will then produce a nice summary of the
2356 * times spent in the different sections into which we categorize the
2357 * run-time of this program.
2360 * TimerOutput computing_timer;
2364 * After these member variables we have a number of auxiliary functions
2365 * that have been broken out of the ones listed above. Specifically, there
2366 * are first three functions that we call from <code>setup_dofs</code> and
2367 * then the ones that do the assembling of linear systems:
2370 * void setup_stokes_matrix(
2371 * const std::vector<IndexSet> &stokes_partitioning,
2372 * const std::vector<IndexSet> &stokes_relevant_partitioning);
2373 * void setup_stokes_preconditioner(
2374 * const std::vector<IndexSet> &stokes_partitioning,
2375 * const std::vector<IndexSet> &stokes_relevant_partitioning);
2376 * void setup_temperature_matrices(
2377 * const IndexSet &temperature_partitioning,
2378 * const IndexSet &temperature_relevant_partitioning);
2383 * Following the @ref MTWorkStream "task-based parallelization" paradigm,
2384 * we split all the assembly routines into two parts: a first part that
2385 * can do all the calculations on a certain cell without taking care of
2386 * other threads, and a second part (which is writing the local data into
2387 * the global matrices and vectors) which can be entered by only one
2388 * thread at a time. In order to implement that, we provide functions for
2389 * each of those two steps for all the four assembly routines that we use
2390 * in this program. The following eight functions do exactly this:
2393 * void local_assemble_stokes_preconditioner(
2394 * const typename DoFHandler<dim>::active_cell_iterator &cell,
2395 * Assembly::Scratch::StokesPreconditioner<dim> &scratch,
2396 * Assembly::CopyData::StokesPreconditioner<dim> &data);
2398 * void copy_local_to_global_stokes_preconditioner(
2399 * const Assembly::CopyData::StokesPreconditioner<dim> &data);
2402 * void local_assemble_stokes_system(
2403 * const typename DoFHandler<dim>::active_cell_iterator &cell,
2404 * Assembly::Scratch::StokesSystem<dim> &scratch,
2405 * Assembly::CopyData::StokesSystem<dim> &data);
2407 * void copy_local_to_global_stokes_system(
2408 * const Assembly::CopyData::StokesSystem<dim> &data);
2411 * void local_assemble_temperature_matrix(
2412 * const typename DoFHandler<dim>::active_cell_iterator &cell,
2413 * Assembly::Scratch::TemperatureMatrix<dim> &scratch,
2414 * Assembly::CopyData::TemperatureMatrix<dim> &data);
2416 * void copy_local_to_global_temperature_matrix(
2417 * const Assembly::CopyData::TemperatureMatrix<dim> &data);
2421 * void local_assemble_temperature_rhs(
2422 * const std::pair<double, double> global_T_range,
2423 * const double global_max_velocity,
2424 * const double global_entropy_variation,
2425 * const typename DoFHandler<dim>::active_cell_iterator &cell,
2426 * Assembly::Scratch::TemperatureRHS<dim> &scratch,
2427 * Assembly::CopyData::TemperatureRHS<dim> &data);
2429 * void copy_local_to_global_temperature_rhs(
2430 * const Assembly::CopyData::TemperatureRHS<dim> &data);
2434 * Finally, we forward declare a member class that we will define later on
2435 * and that will be used to compute a number of quantities from our
2436 * solution vectors that we'd like to put into the output files
for
2440 *
class Postprocessor;
2447 * <a name=
"step_32-BoussinesqFlowProblemclassimplementation"></a>
2448 * <h3>BoussinesqFlowProblem
class implementation</h3>
2453 * <a name=
"step_32-BoussinesqFlowProblemParameters"></a>
2454 * <h4>BoussinesqFlowProblem::Parameters</h4>
2458 * Here comes the definition of the parameters
for the Stokes problem. We
2459 * allow to set the
end time
for the simulation, the level of refinements
2460 * (both global and adaptive, which in the
sum specify what maximum level
2461 * the cells are allowed to have), and the interval between refinements in
2462 * the time stepping.
2466 * Then, we let the user specify constants
for the stabilization parameters
2467 * (as discussed in the introduction), the polynomial degree
for the Stokes
2468 * velocity space, whether to use the locally conservative discretization
2469 * based on
FE_DGP elements
for the pressure or not (
FE_Q elements
for
2470 * pressure), and the polynomial degree
for the temperature interpolation.
2474 * The constructor checks
for a
valid input file (
if not, a file with
2475 *
default parameters
for the quantities is written), and eventually parses
2479 *
template <
int dim>
2480 * BoussinesqFlowProblem<dim>::Parameters::Parameters(
2481 *
const std::string ¶meter_filename)
2483 * , initial_global_refinement(2)
2484 * , initial_adaptive_refinement(2)
2485 * , adaptive_refinement_interval(10)
2486 * , stabilization_alpha(2)
2487 * , stabilization_c_R(0.11)
2488 * , stabilization_beta(0.078)
2489 * , stokes_velocity_degree(2)
2490 * , use_locally_conservative_discretization(
true)
2491 * , temperature_degree(2)
2494 * BoussinesqFlowProblem<dim>::Parameters::declare_parameters(prm);
2496 * std::ifstream parameter_file(parameter_filename);
2498 *
if (!parameter_file)
2500 * parameter_file.close();
2502 * std::ofstream parameter_out(parameter_filename);
2508 *
"Input parameter file <" + parameter_filename +
2509 *
"> not found. Creating a template file of the same name."));
2512 * prm.parse_input(parameter_file);
2513 * parse_parameters(prm);
2520 * Next we have a function that declares the parameters that we expect in
2521 * the input file, together with their data
types,
default values and a
2525 *
template <
int dim>
2526 *
void BoussinesqFlowProblem<dim>::Parameters::declare_parameters(
2529 * prm.declare_entry(
"End time",
2532 *
"The end time of the simulation in years.");
2533 * prm.declare_entry(
"Initial global refinement",
2536 *
"The number of global refinement steps performed on "
2537 *
"the initial coarse mesh, before the problem is first "
2539 * prm.declare_entry(
"Initial adaptive refinement",
2542 *
"The number of adaptive refinement steps performed after "
2543 *
"initial global refinement.");
2544 * prm.declare_entry(
"Time steps between mesh refinement",
2547 *
"The number of time steps after which the mesh is to be "
2548 *
"adapted based on computed error indicators.");
2549 * prm.declare_entry(
"Generate graphical output",
2552 *
"Whether graphical output is to be generated or not. "
2553 *
"You may not want to get graphical output if the number "
2554 *
"of processors is large.");
2555 * prm.declare_entry(
"Time steps between graphical output",
2558 *
"The number of time steps between each generation of "
2559 *
"graphical output files.");
2561 * prm.enter_subsection(
"Stabilization parameters");
2563 * prm.declare_entry(
"alpha",
2566 *
"The exponent in the entropy viscosity stabilization.");
2567 * prm.declare_entry(
"c_R",
2570 *
"The c_R factor in the entropy viscosity "
2571 *
"stabilization.");
2572 * prm.declare_entry(
"beta",
2575 *
"The beta factor in the artificial viscosity "
2576 *
"stabilization. An appropriate value for 2d is 0.052 "
2577 *
"and 0.078 for 3d.");
2579 * prm.leave_subsection();
2581 * prm.enter_subsection(
"Discretization");
2583 * prm.declare_entry(
2584 *
"Stokes velocity polynomial degree",
2587 *
"The polynomial degree to use for the velocity variables "
2588 *
"in the Stokes system.");
2589 * prm.declare_entry(
2590 *
"Temperature polynomial degree",
2593 *
"The polynomial degree to use for the temperature variable.");
2594 * prm.declare_entry(
2595 *
"Use locally conservative discretization",
2598 *
"Whether to use a Stokes discretization that is locally "
2599 *
"conservative at the expense of a larger number of degrees "
2600 *
"of freedom, or to go with a cheaper discretization "
2601 *
"that does not locally conserve mass (although it is "
2602 *
"globally conservative.");
2604 * prm.leave_subsection();
2611 * And then we need a function that reads the contents of the
2613 * results into variables that store the values of the parameters we have
2614 * previously declared:
2617 *
template <
int dim>
2618 *
void BoussinesqFlowProblem<dim>::Parameters::parse_parameters(
2621 * end_time = prm.get_double(
"End time");
2622 * initial_global_refinement = prm.get_integer(
"Initial global refinement");
2623 * initial_adaptive_refinement =
2624 * prm.get_integer(
"Initial adaptive refinement");
2626 * adaptive_refinement_interval =
2627 * prm.get_integer(
"Time steps between mesh refinement");
2629 * generate_graphical_output = prm.get_bool(
"Generate graphical output");
2630 * graphical_output_interval =
2631 * prm.get_integer(
"Time steps between graphical output");
2633 * prm.enter_subsection(
"Stabilization parameters");
2635 * stabilization_alpha = prm.get_double(
"alpha");
2636 * stabilization_c_R = prm.get_double(
"c_R");
2637 * stabilization_beta = prm.get_double(
"beta");
2639 * prm.leave_subsection();
2641 * prm.enter_subsection(
"Discretization");
2643 * stokes_velocity_degree =
2644 * prm.get_integer(
"Stokes velocity polynomial degree");
2645 * temperature_degree = prm.get_integer(
"Temperature polynomial degree");
2646 * use_locally_conservative_discretization =
2647 * prm.get_bool(
"Use locally conservative discretization");
2649 * prm.leave_subsection();
2657 * <a name=
"step_32-BoussinesqFlowProblemBoussinesqFlowProblem"></a>
2658 * <h4>BoussinesqFlowProblem::BoussinesqFlowProblem</h4>
2662 * The constructor of the problem is very similar to the constructor in
2663 * @ref step_31
"step-31". What is different is the %
parallel communication: Trilinos uses
2664 * a message passing interface (
MPI)
for data distribution. When entering
2665 * the BoussinesqFlowProblem
class, we have to decide how the parallelization
2666 * is to be done. We choose a rather simple strategy and let all processors
2667 * that are running the program work together, specified by the communicator
2668 * <code>MPI_COMM_WORLD</code>. Next, we create the output stream (as we
2669 * already did in @ref step_18
"step-18") that only generates output on the first
MPI
2670 * process and is completely forgetful on all others. The implementation of
2671 *
this idea is to
check the process number when <code>pcout</code> gets a
2672 *
true argument, and it uses the <code>std::cout</code> stream
for
2673 * output. If we are
one processor five,
for instance, then we will give a
2674 * <code>
false</code> argument to <code>pcout</code>, which means that the
2675 * output of that processor will not be printed. With the exception of the
2676 *
mapping object (
for which we use polynomials of degree 4) all but the
2677 *
final member variable are exactly the same as in @ref step_31
"step-31".
2681 * This
final object, the
TimerOutput object, is then told to restrict
2682 * output to the <code>pcout</code> stream (processor 0), and then we
2683 * specify that we want to get a summary table at the
end of the program
2684 * which shows us wallclock times (as opposed to CPU times). We will
2685 * manually also request intermediate summaries every so many time steps in
2686 * the <code>
run()</code> function below.
2689 *
template <
int dim>
2690 * BoussinesqFlowProblem<dim>::BoussinesqFlowProblem(Parameters ¶meters_)
2691 * : parameters(parameters_)
2695 * triangulation(MPI_COMM_WORLD,
2701 * global_Omega_diameter(0.)
2707 * stokes_fe(
FE_Q<dim>(parameters.stokes_velocity_degree) ^ dim,
2708 * (parameters.use_locally_conservative_discretization ?
2710 *
FE_DGP<dim>(parameters.stokes_velocity_degree - 1)) :
2712 *
FE_Q<dim>(parameters.stokes_velocity_degree - 1))))
2715 * stokes_dof_handler(triangulation)
2718 * temperature_fe(parameters.temperature_degree)
2719 * , temperature_dof_handler(triangulation)
2723 * , old_time_step(0)
2724 * , timestep_number(0)
2725 * , rebuild_stokes_matrix(
true)
2726 * , rebuild_stokes_preconditioner(
true)
2727 * , rebuild_temperature_matrices(
true)
2728 * , rebuild_temperature_preconditioner(
true)
2731 * computing_timer(MPI_COMM_WORLD,
2742 * <a name=
"step_32-TheBoussinesqFlowProblemhelperfunctions"></a>
2743 * <h4>The BoussinesqFlowProblem helper
functions</h4>
2745 * <a name=
"step_32-BoussinesqFlowProblemget_maximal_velocity"></a>
2746 * <h5>BoussinesqFlowProblem::get_maximal_velocity</h5>
2750 * Except
for two small details, the function to compute the global maximum
2751 * of the velocity is the same as in @ref step_31
"step-31". The first detail is actually
2752 * common to all
functions that implement loops over all cells in the
2753 * triangulation: When operating in %
parallel, each processor can only work
2754 * on a chunk of cells since each processor only has a certain part of the
2755 * entire triangulation. This chunk of cells that we want to work on is
2756 * identified via a so-called <code>
subdomain_id</code>, as we also did in
2757 * @ref step_18
"step-18". All we need to change is hence to perform the cell-related
2758 * operations only on cells that are owned by the current process (as
2759 * opposed to ghost or artificial cells), i.e.
for which the subdomain
id
2760 * equals the number of the process ID. Since
this is a commonly used
2761 * operation, there is a shortcut
for this operation: we can ask whether the
2762 * cell is owned by the current processor
using
2763 * <code>cell-@>is_locally_owned()</code>.
2767 * The second difference is the way we calculate the maximum
value. Before,
2768 * we could simply have a <code>
double</code> variable that we checked
2769 * against on each quadrature
point for each cell. Now, we have to be a bit
2770 * more careful since each processor only operates on a subset of
2771 * cells. What we
do is to first let each processor calculate the maximum
2772 * among its cells, and then
do a global communication operation
2774 * all the maximum values of the individual processors.
MPI provides such a
2775 * call, but it
's even simpler to use the respective function in namespace
2776 * Utilities::MPI using the MPI communicator object since that will do the
2777 * right thing even if we work without MPI and on a single machine only. The
2778 * call to <code>Utilities::MPI::max</code> needs two arguments, namely the
2779 * local maximum (input) and the MPI communicator, which is MPI_COMM_WORLD
2783 * template <int dim>
2784 * double BoussinesqFlowProblem<dim>::get_maximal_velocity() const
2786 * const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
2787 * parameters.stokes_velocity_degree);
2788 * const unsigned int n_q_points = quadrature_formula.size();
2790 * FEValues<dim> fe_values(mapping,
2792 * quadrature_formula,
2794 * std::vector<Tensor<1, dim>> velocity_values(n_q_points);
2796 * const FEValuesExtractors::Vector velocities(0);
2798 * double max_local_velocity = 0;
2800 * for (const auto &cell : stokes_dof_handler.active_cell_iterators())
2801 * if (cell->is_locally_owned())
2803 * fe_values.reinit(cell);
2804 * fe_values[velocities].get_function_values(stokes_solution,
2807 * for (unsigned int q = 0; q < n_q_points; ++q)
2808 * max_local_velocity =
2809 * std::max(max_local_velocity, velocity_values[q].norm());
2812 * return Utilities::MPI::max(max_local_velocity, MPI_COMM_WORLD);
2819 * <a name="step_32-BoussinesqFlowProblemget_cfl_number"></a>
2820 * <h5>BoussinesqFlowProblem::get_cfl_number</h5>
2824 * The next function does something similar, but we now compute the CFL
2825 * number, i.e., maximal velocity on a cell divided by the cell
2826 * diameter. This number is necessary to determine the time step size, as we
2827 * use a semi-explicit time stepping scheme for the temperature equation
2828 * (see @ref step_31 "step-31" for a discussion). We compute it in the same way as above:
2829 * Compute the local maximum over all locally owned cells, then exchange it
2830 * via MPI to find the global maximum.
2833 * template <int dim>
2834 * double BoussinesqFlowProblem<dim>::get_cfl_number() const
2836 * const QIterated<dim> quadrature_formula(QTrapezoid<1>(),
2837 * parameters.stokes_velocity_degree);
2838 * const unsigned int n_q_points = quadrature_formula.size();
2840 * FEValues<dim> fe_values(mapping,
2842 * quadrature_formula,
2844 * std::vector<Tensor<1, dim>> velocity_values(n_q_points);
2846 * const FEValuesExtractors::Vector velocities(0);
2848 * double max_local_cfl = 0;
2850 * for (const auto &cell : stokes_dof_handler.active_cell_iterators())
2851 * if (cell->is_locally_owned())
2853 * fe_values.reinit(cell);
2854 * fe_values[velocities].get_function_values(stokes_solution,
2857 * double max_local_velocity = 1e-10;
2858 * for (unsigned int q = 0; q < n_q_points; ++q)
2859 * max_local_velocity =
2860 * std::max(max_local_velocity, velocity_values[q].norm());
2862 * std::max(max_local_cfl, max_local_velocity / cell->diameter());
2865 * return Utilities::MPI::max(max_local_cfl, MPI_COMM_WORLD);
2872 * <a name="step_32-BoussinesqFlowProblemget_entropy_variation"></a>
2873 * <h5>BoussinesqFlowProblem::get_entropy_variation</h5>
2877 * Next comes the computation of the global entropy variation
2878 * @f$\|E(T)-\bar{E}(T)\|_\infty@f$ where the entropy @f$E@f$ is defined as
2879 * discussed in the introduction. This is needed for the evaluation of the
2880 * stabilization in the temperature equation as explained in the
2881 * introduction. The entropy variation is actually only needed if we use
2882 * @f$\alpha=2@f$ as a power in the residual computation. The infinity norm is
2883 * computed by the maxima over quadrature points, as usual in discrete
2888 * In order to compute this quantity, we first have to find the
2889 * space-average @f$\bar{E}(T)@f$ and then evaluate the maximum. However, that
2890 * means that we would need to perform two loops. We can avoid the overhead
2891 * by noting that @f$\|E(T)-\bar{E}(T)\|_\infty =
2892 * \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
2893 * \bar{E}(T)-E_{\textrm{min}}(T)\big)@f$, i.e., the maximum out of the
2894 * deviation from the average entropy in positive and negative
2895 * directions. The four quantities we need for the latter formula (maximum
2896 * entropy, minimum entropy, average entropy, area) can all be evaluated in
2897 * the same loop over all cells, so we choose this simpler variant.
2900 * template <int dim>
2901 * double BoussinesqFlowProblem<dim>::get_entropy_variation(
2902 * const double average_temperature) const
2904 * if (parameters.stabilization_alpha != 2)
2907 * const QGauss<dim> quadrature_formula(parameters.temperature_degree + 1);
2908 * const unsigned int n_q_points = quadrature_formula.size();
2910 * FEValues<dim> fe_values(temperature_fe,
2911 * quadrature_formula,
2912 * update_values | update_JxW_values);
2913 * std::vector<double> old_temperature_values(n_q_points);
2914 * std::vector<double> old_old_temperature_values(n_q_points);
2918 * In the two functions above we computed the maximum of numbers that were
2919 * all non-negative, so we knew that zero was certainly a lower bound. On
2920 * the other hand, here we need to find the maximum deviation from the
2921 * average value, i.e., we will need to know the maximal and minimal
2922 * values of the entropy for which we don't a priori know the
sign.
2926 * To compute it, we can therefore start with the largest and smallest
2927 * possible values we can store in a
double precision number: The minimum
2928 * is initialized with a bigger and the maximum with a smaller number than
2929 * any
one that is going to appear. We are then guaranteed that these
2930 *
numbers will be overwritten in the
loop on the first cell or,
if this
2931 * processor does not own any cells, in the communication step at the
2932 * latest. The following
loop then computes the minimum and maximum local
2933 * entropy as well as keeps track of the area/
volume of the part of the
2934 * domain we locally own and the integral over the entropy on it:
2937 *
double min_entropy = std::numeric_limits<double>::max(),
2938 * max_entropy = std::numeric_limits<double>::lowest(), area = 0,
2939 * entropy_integrated = 0;
2941 *
for (
const auto &cell : temperature_dof_handler.active_cell_iterators())
2942 *
if (cell->is_locally_owned())
2944 * fe_values.reinit(cell);
2945 * fe_values.get_function_values(old_temperature_solution,
2946 * old_temperature_values);
2947 * fe_values.get_function_values(old_old_temperature_solution,
2948 * old_old_temperature_values);
2949 *
for (
unsigned int q = 0; q < n_q_points; ++q)
2952 * (old_temperature_values[q] + old_old_temperature_values[q]) / 2;
2953 *
const double entropy =
2954 * ((T - average_temperature) * (T - average_temperature));
2956 * min_entropy =
std::min(min_entropy, entropy);
2957 * max_entropy =
std::max(max_entropy, entropy);
2958 * area += fe_values.JxW(q);
2959 * entropy_integrated += fe_values.JxW(q) * entropy;
2965 * Now we only need to exchange data between processors: we need to
sum
2966 * the two integrals (<code>area</code>, <code>entropy_integrated</code>),
2967 * and get the extrema
for maximum and minimum. We could
do this through
2968 * four different data exchanges, but we can it with two:
2970 * values that are all to be summed up. And we can also utilize the
2972 * the minimal entropies equals forming the
negative of the maximum over
2973 * the
negative of the minimal entropies;
this maximum can then be
2974 * combined with forming the maximum over the maximal entropies.
2977 *
const double local_sums[2] = {entropy_integrated, area},
2978 * local_maxima[2] = {-min_entropy, max_entropy};
2979 *
double global_sums[2], global_maxima[2];
2986 * Having computed everything
this way, we can then compute the average
2987 * entropy and find the @f$L^\infty@f$
norm by taking the larger of the
2988 * deviation of the maximum or minimum from the average:
2991 *
const double average_entropy = global_sums[0] / global_sums[1];
2992 *
const double entropy_diff =
std::max(global_maxima[1] - average_entropy,
2993 * average_entropy - (-global_maxima[0]));
2994 *
return entropy_diff;
3002 * <a name=
"step_32-BoussinesqFlowProblemget_extrapolated_temperature_range"></a>
3003 * <h5>BoussinesqFlowProblem::get_extrapolated_temperature_range</h5>
3007 * The next function computes the minimal and maximal
value of the
3008 * extrapolated temperature over the entire domain. Again,
this is only a
3009 * slightly modified version of the respective function in @ref step_31
"step-31". As in
3010 * the function above, we collect local minima and maxima and then compute
3011 * the global extrema
using the same trick as above.
3015 * As already discussed in @ref step_31
"step-31", the function needs to distinguish
3016 * between the first and all following time steps because it uses a higher
3017 * order temperature extrapolation scheme when at least two previous time
3018 * steps are available.
3021 *
template <
int dim>
3022 * std::pair<double, double>
3023 * BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range() const
3026 * parameters.temperature_degree);
3027 *
const unsigned int n_q_points = quadrature_formula.size();
3031 * quadrature_formula,
3033 * std::vector<double> old_temperature_values(n_q_points);
3034 * std::vector<double> old_old_temperature_values(n_q_points);
3036 *
double min_local_temperature = std::numeric_limits<double>::max(),
3037 * max_local_temperature = std::numeric_limits<double>::lowest();
3039 *
if (timestep_number != 0)
3041 *
for (
const auto &cell : temperature_dof_handler.active_cell_iterators())
3042 *
if (cell->is_locally_owned())
3044 * fe_values.reinit(cell);
3045 * fe_values.get_function_values(old_temperature_solution,
3046 * old_temperature_values);
3047 * fe_values.get_function_values(old_old_temperature_solution,
3048 * old_old_temperature_values);
3050 *
for (
unsigned int q = 0; q < n_q_points; ++q)
3052 *
const double temperature =
3053 * (1. + time_step / old_time_step) *
3054 * old_temperature_values[q] -
3055 * time_step / old_time_step * old_old_temperature_values[q];
3057 * min_local_temperature =
3058 *
std::min(min_local_temperature, temperature);
3059 * max_local_temperature =
3060 *
std::max(max_local_temperature, temperature);
3066 *
for (
const auto &cell : temperature_dof_handler.active_cell_iterators())
3067 *
if (cell->is_locally_owned())
3069 * fe_values.reinit(cell);
3070 * fe_values.get_function_values(old_temperature_solution,
3071 * old_temperature_values);
3073 *
for (
unsigned int q = 0; q < n_q_points; ++q)
3075 *
const double temperature = old_temperature_values[q];
3077 * min_local_temperature =
3078 *
std::min(min_local_temperature, temperature);
3079 * max_local_temperature =
3080 *
std::max(max_local_temperature, temperature);
3085 *
double local_extrema[2] = {-min_local_temperature, max_local_temperature};
3086 *
double global_extrema[2];
3089 *
return std::make_pair(-global_extrema[0], global_extrema[1]);
3096 * <a name=
"step_32-BoussinesqFlowProblemcompute_viscosity"></a>
3097 * <h5>BoussinesqFlowProblem::compute_viscosity</h5>
3101 * The function that calculates the viscosity is purely local and so needs
3102 * no communication at all. It is mostly the same as in @ref step_31
"step-31" but with an
3103 * updated formulation of the viscosity
if @f$\alpha=2@f$ is chosen:
3106 *
template <
int dim>
3107 *
double BoussinesqFlowProblem<dim>::compute_viscosity(
3108 *
const std::vector<double> &old_temperature,
3109 *
const std::vector<double> &old_old_temperature,
3112 *
const std::vector<double> &old_temperature_laplacians,
3113 *
const std::vector<double> &old_old_temperature_laplacians,
3118 *
const double global_u_infty,
3119 *
const double global_T_variation,
3120 *
const double average_temperature,
3121 *
const double global_entropy_variation,
3122 *
const double cell_diameter)
const
3124 *
if (global_u_infty == 0)
3125 *
return 5
e-3 * cell_diameter;
3127 *
const unsigned int n_q_points = old_temperature.size();
3129 *
double max_residual = 0;
3130 *
double max_velocity = 0;
3132 *
for (
unsigned int q = 0; q < n_q_points; ++q)
3135 * (old_velocity_values[q] + old_old_velocity_values[q]) / 2;
3138 * (old_strain_rates[q] + old_old_strain_rates[q]) / 2;
3140 *
const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
3141 *
const double dT_dt =
3142 * (old_temperature[q] - old_old_temperature[q]) / old_time_step;
3143 *
const double u_grad_T =
3144 * u * (old_temperature_grads[q] + old_old_temperature_grads[q]) / 2;
3146 *
const double kappa_Delta_T =
3147 * EquationData::kappa *
3148 * (old_temperature_laplacians[q] + old_old_temperature_laplacians[q]) /
3150 *
const double gamma =
3151 * ((EquationData::radiogenic_heating * EquationData::density(T) +
3152 * 2 * EquationData::eta * strain_rate * strain_rate) /
3153 * (EquationData::density(T) * EquationData::specific_heat));
3155 *
double residual =
std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
3156 *
if (parameters.stabilization_alpha == 2)
3157 * residual *=
std::abs(T - average_temperature);
3159 * max_residual =
std::max(residual, max_residual);
3163 *
const double max_viscosity =
3164 * (parameters.stabilization_beta * max_velocity * cell_diameter);
3165 *
if (timestep_number == 0)
3166 *
return max_viscosity;
3169 *
Assert(old_time_step > 0, ExcInternalError());
3171 *
double entropy_viscosity;
3172 *
if (parameters.stabilization_alpha == 2)
3173 * entropy_viscosity =
3174 * (parameters.stabilization_c_R * cell_diameter * cell_diameter *
3175 * max_residual / global_entropy_variation);
3177 * entropy_viscosity =
3178 * (parameters.stabilization_c_R * cell_diameter *
3179 * global_Omega_diameter * max_velocity * max_residual /
3180 * (global_u_infty * global_T_variation));
3182 *
return std::min(max_viscosity, entropy_viscosity);
3191 * <a name=
"step_32-TheBoussinesqFlowProblemsetupfunctions"></a>
3192 * <h4>The BoussinesqFlowProblem setup
functions</h4>
3197 *
for the Stokes preconditioner, and the temperature
matrix. The code is
3198 * mostly the same as in @ref step_31
"step-31", but it has been broken out into three
3199 *
functions of their own
for simplicity.
3203 * The main functional difference between the code here and that in @ref step_31
"step-31"
3204 * is that the matrices we want to set up are distributed across multiple
3205 * processors. Since we still want to build up the sparsity pattern first
3206 *
for efficiency reasons, we could
continue to build the <i>entire</i>
3208 * @ref step_31
"step-31". However, that would be inefficient: every processor would build
3209 * the same sparsity pattern, but only initialize a small part of the
matrix
3210 *
using it. It also violates the principle that every processor should only
3211 * work on those cells it owns (and,
if necessary the layer of ghost cells
3217 * which is (obviously) a wrapper around a sparsity pattern
object provided
3218 * by Trilinos. The advantage is that the Trilinos sparsity pattern
class
3219 * can communicate across multiple processors:
if this processor fills in
3220 * all the
nonzero entries that result from the cells it owns, and every
3221 * other processor does so as well, then at the
end after some
MPI
3222 * communication initiated by the <code>
compress()</code> call, we will have
3223 * the globally assembled sparsity pattern available with which the global
3224 *
matrix can be initialized.
3228 * There is
one important aspect when initializing Trilinos sparsity
3229 * patterns in
parallel: In addition to specifying the locally owned rows
3230 * and columns of the matrices via the @p stokes_partitioning
index set, we
3231 * also supply information about all the rows we are possibly going to write
3232 * into when assembling on a certain processor. The set of locally relevant
3233 * rows contains all such rows (possibly also a few unnecessary ones, but it
3234 * is difficult to find the exact row indices before actually getting
3235 * indices on all cells and resolving constraints). This additional
3236 * information allows to exactly determine the structure
for the
3237 * off-processor data found during assembly. While Trilinos matrices are
3238 * able to collect
this information on the fly as well (when initializing
3239 * them from some other reinit method), it is less efficient and leads to
3240 * problems when assembling matrices with multiple threads. In
this program,
3241 * we pessimistically assume that only
one processor at a time can write
3242 * into the
matrix while assembly (whereas the computation is
parallel),
3243 * which is fine
for Trilinos matrices. In practice,
one can
do better by
3244 * hinting
WorkStream at cells that
do not share vertices, allowing
for
3245 * parallelism among those cells (see the graph coloring algorithms and
3246 *
WorkStream with colored iterators argument). However, that only works
3247 * when only
one MPI processor is present because Trilinos
' internal data
3248 * structures for accumulating off-processor data on the fly are not thread
3249 * safe. With the initialization presented here, there is no such problem
3250 * and one could safely introduce graph coloring for this algorithm.
3254 * The only other change we need to make is to tell the
3255 * DoFTools::make_sparsity_pattern() function that it is only supposed to
3256 * work on a subset of cells, namely the ones whose
3257 * <code>subdomain_id</code> equals the number of the current processor, and
3258 * to ignore all other cells.
3262 * This strategy is replicated across all three of the following functions.
3266 * Note that Trilinos matrices store the information contained in the
3267 * sparsity patterns, so we can safely release the <code>sp</code> variable
3268 * once the matrix has been given the sparsity structure.
3271 * template <int dim>
3272 * void BoussinesqFlowProblem<dim>::setup_stokes_matrix(
3273 * const std::vector<IndexSet> &stokes_partitioning,
3274 * const std::vector<IndexSet> &stokes_relevant_partitioning)
3276 * stokes_matrix.clear();
3278 * TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
3279 * stokes_partitioning,
3280 * stokes_relevant_partitioning,
3283 * Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
3284 * for (unsigned int c = 0; c < dim + 1; ++c)
3285 * for (unsigned int d = 0; d < dim + 1; ++d)
3286 * if (!((c == dim) && (d == dim)))
3287 * coupling[c][d] = DoFTools::always;
3289 * coupling[c][d] = DoFTools::none;
3291 * DoFTools::make_sparsity_pattern(stokes_dof_handler,
3294 * stokes_constraints,
3296 * Utilities::MPI::this_mpi_process(
3300 * stokes_matrix.reinit(sp);
3305 * template <int dim>
3306 * void BoussinesqFlowProblem<dim>::setup_stokes_preconditioner(
3307 * const std::vector<IndexSet> &stokes_partitioning,
3308 * const std::vector<IndexSet> &stokes_relevant_partitioning)
3310 * Amg_preconditioner.reset();
3311 * Mp_preconditioner.reset();
3313 * stokes_preconditioner_matrix.clear();
3315 * TrilinosWrappers::BlockSparsityPattern sp(stokes_partitioning,
3316 * stokes_partitioning,
3317 * stokes_relevant_partitioning,
3320 * Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
3321 * for (unsigned int c = 0; c < dim + 1; ++c)
3322 * for (unsigned int d = 0; d < dim + 1; ++d)
3324 * coupling[c][d] = DoFTools::always;
3326 * coupling[c][d] = DoFTools::none;
3328 * DoFTools::make_sparsity_pattern(stokes_dof_handler,
3331 * stokes_constraints,
3333 * Utilities::MPI::this_mpi_process(
3337 * stokes_preconditioner_matrix.reinit(sp);
3341 * template <int dim>
3342 * void BoussinesqFlowProblem<dim>::setup_temperature_matrices(
3343 * const IndexSet &temperature_partitioner,
3344 * const IndexSet &temperature_relevant_partitioner)
3346 * T_preconditioner.reset();
3347 * temperature_mass_matrix.clear();
3348 * temperature_stiffness_matrix.clear();
3349 * temperature_matrix.clear();
3351 * TrilinosWrappers::SparsityPattern sp(temperature_partitioner,
3352 * temperature_partitioner,
3353 * temperature_relevant_partitioner,
3355 * DoFTools::make_sparsity_pattern(temperature_dof_handler,
3357 * temperature_constraints,
3359 * Utilities::MPI::this_mpi_process(
3363 * temperature_matrix.reinit(sp);
3364 * temperature_mass_matrix.reinit(sp);
3365 * temperature_stiffness_matrix.reinit(sp);
3372 * The remainder of the setup function (after splitting out the three
3373 * functions above) mostly has to deal with the things we need to do for
3374 * parallelization across processors. Because setting all of this up is a
3375 * significant compute time expense of the program, we put everything we do
3376 * here into a timer group so that we can get summary information about the
3377 * fraction of time spent in this part of the program at its end.
3381 * At the top as usual we enumerate degrees of freedom and sort them by
3382 * component/block, followed by writing their numbers to the screen from
3383 * processor zero. The DoFHandler::distributed_dofs() function, when applied
3384 * to a parallel::distributed::Triangulation object, sorts degrees of
3385 * freedom in such a way that all degrees of freedom associated with
3386 * subdomain zero come before all those associated with subdomain one,
3387 * etc. For the Stokes part, this entails, however, that velocities and
3388 * pressures become intermixed, but this is trivially solved by sorting
3389 * again by blocks; it is worth noting that this latter operation leaves the
3390 * relative ordering of all velocities and pressures alone, i.e. within the
3391 * velocity block we will still have all those associated with subdomain
3392 * zero before all velocities associated with subdomain one, etc. This is
3393 * important since we store each of the blocks of this matrix distributed
3394 * across all processors and want this to be done in such a way that each
3395 * processor stores that part of the matrix that is roughly equal to the
3396 * degrees of freedom located on those cells that it will actually work on.
3400 * When printing the numbers of degrees of freedom, note that these numbers
3401 * are going to be large if we use many processors. Consequently, we let the
3402 * stream put a comma separator in between every three digits. The state of
3403 * the stream, using the locale, is saved from before to after this
3404 * operation. While slightly opaque, the code works because the default
3405 * locale (which we get using the constructor call
3406 * <code>std::locale("")</code>) implies printing numbers with a comma
3407 * separator for every third digit (i.e., thousands, millions, billions).
3411 * In this function as well as many below, we measure how much time
3412 * we spend here and collect that in a section called "Setup dof
3413 * systems" across function invocations. This is done using an
3414 * TimerOutput::Scope object that gets a timer going in the section
3415 * with above name of the `computing_timer` object upon construction
3416 * of the local variable; the timer is stopped again when the
3417 * destructor of the `timing_section` variable is called. This, of
3418 * course, happens either at the end of the function, or if we leave
3419 * the function through a `return` statement or when an exception is
3420 * thrown somewhere -- in other words, whenever we leave this
3421 * function in any way. The use of such "scope" objects therefore
3422 * makes sure that we do not have to manually add code that tells
3423 * the timer to stop at every location where this function may be
3427 * template <int dim>
3428 * void BoussinesqFlowProblem<dim>::setup_dofs()
3430 * TimerOutput::Scope timing_section(computing_timer, "Setup dof systems");
3432 * stokes_dof_handler.distribute_dofs(stokes_fe);
3434 * std::vector<unsigned int> stokes_sub_blocks(dim + 1, 0);
3435 * stokes_sub_blocks[dim] = 1;
3436 * DoFRenumbering::component_wise(stokes_dof_handler, stokes_sub_blocks);
3438 * temperature_dof_handler.distribute_dofs(temperature_fe);
3440 * const std::vector<types::global_dof_index> stokes_dofs_per_block =
3441 * DoFTools::count_dofs_per_fe_block(stokes_dof_handler, stokes_sub_blocks);
3443 * const types::global_dof_index n_u = stokes_dofs_per_block[0],
3444 * n_p = stokes_dofs_per_block[1],
3445 * n_T = temperature_dof_handler.n_dofs();
3447 * std::locale s = pcout.get_stream().getloc();
3448 * pcout.get_stream().imbue(std::locale(""));
3449 * pcout << "Number of active cells: " << triangulation.n_global_active_cells()
3450 * << " (on " << triangulation.n_levels() << " levels)" << std::endl
3451 * << "Number of degrees of freedom: " << n_u + n_p + n_T << " (" << n_u
3452 * << '+
' << n_p << '+
' << n_T << ')
' << std::endl
3454 * pcout.get_stream().imbue(s);
3459 * After this, we have to set up the various partitioners (of type
3460 * <code>IndexSet</code>, see the introduction) that describe which parts
3461 * of each matrix or vector will be stored where, then call the functions
3462 * that actually set up the matrices, and at the end also resize the
3463 * various vectors we keep around in this program.
3469 * const IndexSet &stokes_locally_owned_index_set =
3470 * stokes_dof_handler.locally_owned_dofs();
3471 * const IndexSet stokes_locally_relevant_set =
3472 * DoFTools::extract_locally_relevant_dofs(stokes_dof_handler);
3474 * std::vector<IndexSet> stokes_partitioning;
3475 * stokes_partitioning.push_back(
3476 * stokes_locally_owned_index_set.get_view(0, n_u));
3477 * stokes_partitioning.push_back(
3478 * stokes_locally_owned_index_set.get_view(n_u, n_u + n_p));
3480 * std::vector<IndexSet> stokes_relevant_partitioning;
3481 * stokes_relevant_partitioning.push_back(
3482 * stokes_locally_relevant_set.get_view(0, n_u));
3483 * stokes_relevant_partitioning.push_back(
3484 * stokes_locally_relevant_set.get_view(n_u, n_u + n_p));
3486 * const IndexSet temperature_partitioning =
3487 * temperature_dof_handler.locally_owned_dofs();
3488 * const IndexSet temperature_relevant_partitioning =
3489 * DoFTools::extract_locally_relevant_dofs(temperature_dof_handler);
3493 * Following this, we can compute constraints for the solution vectors,
3494 * including hanging node constraints and homogeneous and inhomogeneous
3495 * boundary values for the Stokes and temperature fields. Note that as for
3496 * everything else, the constraint objects can not hold <i>all</i>
3497 * constraints on every processor. Rather, each processor needs to store
3498 * only those that are actually necessary for correctness given that it
3499 * only assembles linear systems on cells it owns. As discussed in the
3500 * @ref distributed_paper "this paper", the set of constraints we need to
3501 * know about is exactly the set of constraints on all locally relevant
3502 * degrees of freedom, so this is what we use to initialize the constraint
3507 * stokes_constraints.clear();
3508 * stokes_constraints.reinit(stokes_locally_owned_index_set,
3509 * stokes_locally_relevant_set);
3511 * DoFTools::make_hanging_node_constraints(stokes_dof_handler,
3512 * stokes_constraints);
3514 * const FEValuesExtractors::Vector velocity_components(0);
3515 * VectorTools::interpolate_boundary_values(
3516 * stokes_dof_handler,
3518 * Functions::ZeroFunction<dim>(dim + 1),
3519 * stokes_constraints,
3520 * stokes_fe.component_mask(velocity_components));
3522 * std::set<types::boundary_id> no_normal_flux_boundaries;
3523 * no_normal_flux_boundaries.insert(1);
3524 * VectorTools::compute_no_normal_flux_constraints(stokes_dof_handler,
3526 * no_normal_flux_boundaries,
3527 * stokes_constraints,
3529 * stokes_constraints.close();
3532 * temperature_constraints.clear();
3533 * temperature_constraints.reinit(temperature_partitioning,
3534 * temperature_relevant_partitioning);
3536 * DoFTools::make_hanging_node_constraints(temperature_dof_handler,
3537 * temperature_constraints);
3538 * VectorTools::interpolate_boundary_values(
3539 * temperature_dof_handler,
3541 * EquationData::TemperatureInitialValues<dim>(),
3542 * temperature_constraints);
3543 * VectorTools::interpolate_boundary_values(
3544 * temperature_dof_handler,
3546 * EquationData::TemperatureInitialValues<dim>(),
3547 * temperature_constraints);
3548 * temperature_constraints.close();
3553 * All this done, we can then initialize the various matrix and vector
3554 * objects to their proper sizes. At the end, we also record that all
3555 * matrices and preconditioners have to be re-computed at the beginning of
3556 * the next time step. Note how we initialize the vectors for the Stokes
3557 * and temperature right hand sides: These are writable vectors (last
3558 * boolean argument set to @p true) that have the correct one-to-one
3559 * partitioning of locally owned elements but are still given the relevant
3560 * partitioning for means of figuring out the vector entries that are
3561 * going to be set right away. As for matrices, this allows for writing
3562 * local contributions into the vector with multiple threads (always
3563 * assuming that the same vector entry is not accessed by multiple threads
3564 * at the same time). The other vectors only allow for read access of
3565 * individual elements, including ghosts, but are not suitable for
3569 * setup_stokes_matrix(stokes_partitioning, stokes_relevant_partitioning);
3570 * setup_stokes_preconditioner(stokes_partitioning,
3571 * stokes_relevant_partitioning);
3572 * setup_temperature_matrices(temperature_partitioning,
3573 * temperature_relevant_partitioning);
3575 * stokes_rhs.reinit(stokes_partitioning,
3576 * stokes_relevant_partitioning,
3579 * stokes_solution.reinit(stokes_relevant_partitioning, MPI_COMM_WORLD);
3580 * old_stokes_solution.reinit(stokes_solution);
3582 * temperature_rhs.reinit(temperature_partitioning,
3583 * temperature_relevant_partitioning,
3586 * temperature_solution.reinit(temperature_relevant_partitioning,
3588 * old_temperature_solution.reinit(temperature_solution);
3589 * old_old_temperature_solution.reinit(temperature_solution);
3591 * rebuild_stokes_matrix = true;
3592 * rebuild_stokes_preconditioner = true;
3593 * rebuild_temperature_matrices = true;
3594 * rebuild_temperature_preconditioner = true;
3602 * <a name="step_32-TheBoussinesqFlowProblemassemblyfunctions"></a>
3603 * <h4>The BoussinesqFlowProblem assembly functions</h4>
3607 * Following the discussion in the introduction and in the @ref threads
3608 * topic, we split the assembly functions into different parts:
3612 * <ul> <li> The local calculations of matrices and right hand sides, given
3613 * a certain cell as input (these functions are named
3614 * <code>local_assemble_*</code> below). The resulting function is, in other
3615 * words, essentially the body of the loop over all cells in @ref step_31 "step-31". Note,
3616 * however, that these functions store the result from the local
3617 * calculations in variables of classes from the CopyData namespace.
3621 * <li>These objects are then given to the second step which writes the
3622 * local data into the global data structures (these functions are named
3623 * <code>copy_local_to_global_*</code> below). These functions are pretty
3628 * <li>These two subfunctions are then used in the respective assembly
3629 * routine (called <code>assemble_*</code> below), where a WorkStream object
3630 * is set up and runs over all the cells that belong to the processor's
3636 * <a name=
"step_32-Stokespreconditionerassembly"></a>
3637 * <h5>Stokes preconditioner assembly</h5>
3641 * Let us start with the functions that builds the Stokes
3642 * preconditioner. The first two of these are pretty trivial, given the
3643 * discussion above. Note in particular that the main point in
using the
3644 * scratch data
object is that we want to avoid allocating any objects on
3645 * the free space each time we visit a
new cell. As a consequence, the
3646 * assembly function below only has automatic local variables, and
3647 * everything
else is accessed through the scratch data
object, which is
3648 * allocated only once before we start the loop over all cells:
3651 * template <
int dim>
3652 * void BoussinesqFlowProblem<dim>::local_assemble_stokes_preconditioner(
3653 * const typename
DoFHandler<dim>::active_cell_iterator &cell,
3654 * Assembly::Scratch::StokesPreconditioner<dim> &scratch,
3655 * Assembly::CopyData::StokesPreconditioner<dim> &data)
3657 * const unsigned
int dofs_per_cell = stokes_fe.n_dofs_per_cell();
3658 *
const unsigned int n_q_points =
3659 * scratch.stokes_fe_values.n_quadrature_points;
3664 * scratch.stokes_fe_values.reinit(cell);
3665 * cell->get_dof_indices(data.local_dof_indices);
3667 * data.local_matrix = 0;
3669 *
for (
unsigned int q = 0; q < n_q_points; ++q)
3671 *
for (
unsigned int k = 0; k < dofs_per_cell; ++k)
3673 * scratch.grad_phi_u[k] =
3674 * scratch.stokes_fe_values[velocities].gradient(k, q);
3675 * scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value(k, q);
3678 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
3679 *
for (
unsigned int j = 0; j < dofs_per_cell; ++j)
3680 * data.local_matrix(i, j) +=
3681 * (EquationData::eta *
3683 * (1. / EquationData::eta) * EquationData::pressure_scaling *
3684 * EquationData::pressure_scaling *
3685 * (scratch.phi_p[i] * scratch.phi_p[j])) *
3686 * scratch.stokes_fe_values.JxW(q);
3692 *
template <
int dim>
3693 *
void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_preconditioner(
3694 *
const Assembly::CopyData::StokesPreconditioner<dim> &data)
3696 * stokes_constraints.distribute_local_to_global(data.local_matrix,
3697 * data.local_dof_indices,
3698 * stokes_preconditioner_matrix);
3704 * Now
for the function that actually puts things together,
using the
3706 * enumerate the cells it is supposed to work on. Typically,
one would use
3708 * actually only want the subset of cells that in fact are owned by the
3710 * play: you give it a range of cells and it provides an iterator that only
3711 * iterates over that subset of cells that satisfy a certain predicate (a
3712 * predicate is a function of one argument that either returns true or
3713 * false). The predicate we use here is
IteratorFilters::LocallyOwnedCell,
3714 * i.e., it returns true exactly if the cell is owned by the current
3715 * processor. The resulting iterator range is then exactly what we need.
3719 * With this obstacle out of the way, we call the
WorkStream::run
3720 * function with this set of cells, scratch and copy objects, and
3721 * with pointers to two functions: the local assembly and
3722 * copy-local-to-global function. These functions need to have very
3723 * specific signatures: three arguments in the first and one
3724 * argument in the latter case (see the documentation of the
3725 *
WorkStream::run function for the meaning of these arguments).
3726 * Note how we use a lambda functions to
3727 * create a function
object that satisfies this requirement. It uses
3728 * function arguments for the local assembly function that specify
3729 * cell, scratch data, and copy data, as well as function argument
3730 * for the copy function that expects the
3731 * data to be written into the global matrix (also see the discussion in
3732 * @ref step_13
"step-13"'s <code>assemble_linear_system()</code> function). On the other
3733 * hand, the implicit zeroth argument of member functions (namely
3734 * the <code>this</code> pointer of the
object on which that member
3735 * function is to operate on) is <i>bound</i> to the
3736 * <code>this</code> pointer of the current function and is captured. The
3737 *
WorkStream::run function, as a consequence, does not need to know
3738 * anything about the
object these functions work on.
3742 * When the
WorkStream is executed, it will create several local assembly
3743 * routines of the first kind for several cells and let some available
3744 * processors work on them. The function that needs to be synchronized,
3745 * i.e., the write operation into the global matrix, however, is executed by
3746 * only one thread at a time in the prescribed order. Of course, this only
3747 * holds for the parallelization on a single
MPI process. Different
MPI
3748 * processes will have their own
WorkStream objects and do that work
3749 * completely independently (and in different memory spaces). In a
3750 * distributed calculation, some data will accumulate at degrees of freedom
3751 * that are not owned by the respective processor. It would be inefficient
3752 * to send data around every time we encounter such a dof. What happens
3753 * instead is that the Trilinos sparse matrix will keep that data and send
3754 * it to the owner at the end of assembly, by calling the
3755 * <code>compress()</code> command.
3758 * template <
int dim>
3759 *
void BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner()
3761 * stokes_preconditioner_matrix = 0;
3763 *
const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
3765 *
using CellFilter =
3766 * FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>;
3770 * Assembly::Scratch::StokesPreconditioner<dim> &scratch,
3771 * Assembly::CopyData::StokesPreconditioner<dim> &data) {
3772 * this->local_assemble_stokes_preconditioner(cell, scratch, data);
3776 * [
this](
const Assembly::CopyData::StokesPreconditioner<dim> &data) {
3777 * this->copy_local_to_global_stokes_preconditioner(data);
3781 * stokes_dof_handler.begin_active()),
3782 * CellFilter(IteratorFilters::LocallyOwnedCell(),
3783 * stokes_dof_handler.end()),
3786 * Assembly::Scratch::StokesPreconditioner<dim>(
3788 * quadrature_formula,
3791 * Assembly::CopyData::StokesPreconditioner<dim>(stokes_fe));
3800 * The
final function in
this block initiates assembly of the Stokes
3801 * preconditioner
matrix and then in fact builds the Stokes
3802 * preconditioner. It is mostly the same as in the
serial case. The only
3803 * difference to @ref step_31
"step-31" is that we use a Jacobi preconditioner
for the
3804 * pressure mass
matrix instead of IC, as discussed in the introduction.
3807 *
template <
int dim>
3808 *
void BoussinesqFlowProblem<dim>::build_stokes_preconditioner()
3810 *
if (rebuild_stokes_preconditioner ==
false)
3814 *
" Build Stokes preconditioner");
3815 * pcout <<
" Rebuilding Stokes preconditioner..." << std::flush;
3817 * assemble_stokes_preconditioner();
3820 *
const std::vector<std::vector<bool>> constant_modes =
3822 * stokes_dof_handler, stokes_fe.component_mask(velocity_components));
3824 * Mp_preconditioner =
3825 * std::make_shared<TrilinosWrappers::PreconditionJacobi>();
3826 * Amg_preconditioner = std::make_shared<TrilinosWrappers::PreconditionAMG>();
3830 * Amg_data.elliptic =
true;
3831 * Amg_data.higher_order_elements =
true;
3832 * Amg_data.smoother_sweeps = 2;
3833 * Amg_data.aggregation_threshold = 0.02;
3835 * Mp_preconditioner->initialize(stokes_preconditioner_matrix.block(1, 1));
3836 * Amg_preconditioner->initialize(stokes_preconditioner_matrix.block(0, 0),
3839 * rebuild_stokes_preconditioner =
false;
3841 * pcout << std::endl;
3848 * <a name=
"step_32-Stokessystemassembly"></a>
3849 * <h5>Stokes system assembly</h5>
3853 * The next three
functions implement the assembly of the Stokes system,
3854 * again
split up into a part performing local calculations,
one for writing
3855 * the local data into the global
matrix and vector, and
one for actually
3856 * running the
loop over all cells with the help of the
WorkStream
3857 *
class. Note that the assembly of the Stokes
matrix needs only to be done
3858 * in
case we have changed the mesh. Otherwise, just the
3859 * (temperature-dependent) right hand side needs to be calculated
3860 * here. Since we are working with distributed matrices and vectors, we have
3862 * the assembly in order to send non-local data to the owner process.
3865 *
template <
int dim>
3866 *
void BoussinesqFlowProblem<dim>::local_assemble_stokes_system(
3868 * Assembly::Scratch::StokesSystem<dim> &scratch,
3869 * Assembly::CopyData::StokesSystem<dim> &data)
3871 *
const unsigned int dofs_per_cell =
3872 * scratch.stokes_fe_values.get_fe().n_dofs_per_cell();
3873 *
const unsigned int n_q_points =
3874 * scratch.stokes_fe_values.n_quadrature_points;
3879 * scratch.stokes_fe_values.reinit(cell);
3882 * cell->as_dof_handler_iterator(temperature_dof_handler);
3883 * scratch.temperature_fe_values.reinit(temperature_cell);
3885 *
if (rebuild_stokes_matrix)
3886 * data.local_matrix = 0;
3887 * data.local_rhs = 0;
3889 * scratch.temperature_fe_values.get_function_values(
3890 * old_temperature_solution, scratch.old_temperature_values);
3892 *
for (
unsigned int q = 0; q < n_q_points; ++q)
3894 *
const double old_temperature = scratch.old_temperature_values[q];
3896 *
for (
unsigned int k = 0; k < dofs_per_cell; ++k)
3898 * scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value(k, q);
3899 *
if (rebuild_stokes_matrix)
3901 * scratch.grads_phi_u[k] =
3902 * scratch.stokes_fe_values[velocities].symmetric_gradient(k, q);
3903 * scratch.div_phi_u[k] =
3904 * scratch.stokes_fe_values[velocities].divergence(k, q);
3905 * scratch.phi_p[k] =
3906 * scratch.stokes_fe_values[pressure].value(k, q);
3910 *
if (rebuild_stokes_matrix ==
true)
3911 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
3912 *
for (
unsigned int j = 0; j < dofs_per_cell; ++j)
3913 * data.local_matrix(i, j) +=
3914 * (EquationData::eta * 2 *
3915 * (scratch.grads_phi_u[i] * scratch.grads_phi_u[j]) -
3916 * (EquationData::pressure_scaling * scratch.div_phi_u[i] *
3917 * scratch.phi_p[j]) -
3918 * (EquationData::pressure_scaling * scratch.phi_p[i] *
3919 * scratch.div_phi_u[j])) *
3920 * scratch.stokes_fe_values.JxW(q);
3923 * scratch.stokes_fe_values.quadrature_point(q));
3925 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
3926 * data.local_rhs(i) += (EquationData::density(old_temperature) *
3927 * gravity * scratch.phi_u[i]) *
3928 * scratch.stokes_fe_values.JxW(q);
3931 * cell->get_dof_indices(data.local_dof_indices);
3936 *
template <
int dim>
3937 *
void BoussinesqFlowProblem<dim>::copy_local_to_global_stokes_system(
3938 *
const Assembly::CopyData::StokesSystem<dim> &data)
3940 *
if (rebuild_stokes_matrix ==
true)
3941 * stokes_constraints.distribute_local_to_global(data.local_matrix,
3943 * data.local_dof_indices,
3947 * stokes_constraints.distribute_local_to_global(data.local_rhs,
3948 * data.local_dof_indices,
3954 *
template <
int dim>
3955 *
void BoussinesqFlowProblem<dim>::assemble_stokes_system()
3958 *
" Assemble Stokes system");
3960 *
if (rebuild_stokes_matrix ==
true)
3961 * stokes_matrix = 0;
3965 *
const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree + 1);
3967 *
using CellFilter =
3972 * stokes_dof_handler.begin_active()),
3975 * Assembly::Scratch::StokesSystem<dim> &scratch,
3976 * Assembly::CopyData::StokesSystem<dim> &data) {
3977 * this->local_assemble_stokes_system(cell, scratch, data);
3979 * [
this](
const Assembly::CopyData::StokesSystem<dim> &data) {
3980 * this->copy_local_to_global_stokes_system(data);
3982 * Assembly::Scratch::StokesSystem<dim>(
3985 * quadrature_formula,
3990 * Assembly::CopyData::StokesSystem<dim>(stokes_fe));
3992 *
if (rebuild_stokes_matrix ==
true)
3996 * rebuild_stokes_matrix =
false;
3998 * pcout << std::endl;
4005 * <a name=
"step_32-Temperaturematrixassembly"></a>
4006 * <h5>Temperature
matrix assembly</h5>
4010 * The task to be performed by the next three
functions is to calculate a
4011 * mass
matrix and a Laplace
matrix on the temperature system. These will be
4012 * combined in order to yield the semi-implicit time stepping
matrix that
4013 * consists of the mass
matrix plus a time step-dependent weight factor
4014 * times the Laplace
matrix. This function is again essentially the body of
4015 * the
loop over all cells from @ref step_31
"step-31".
4019 * The two following
functions perform similar services as the ones above.
4022 *
template <
int dim>
4023 *
void BoussinesqFlowProblem<dim>::local_assemble_temperature_matrix(
4025 * Assembly::Scratch::TemperatureMatrix<dim> &scratch,
4026 * Assembly::CopyData::TemperatureMatrix<dim> &data)
4028 *
const unsigned int dofs_per_cell =
4029 * scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
4030 *
const unsigned int n_q_points =
4031 * scratch.temperature_fe_values.n_quadrature_points;
4033 * scratch.temperature_fe_values.reinit(cell);
4034 * cell->get_dof_indices(data.local_dof_indices);
4036 * data.local_mass_matrix = 0;
4037 * data.local_stiffness_matrix = 0;
4039 *
for (
unsigned int q = 0; q < n_q_points; ++q)
4041 *
for (
unsigned int k = 0; k < dofs_per_cell; ++k)
4043 * scratch.grad_phi_T[k] =
4044 * scratch.temperature_fe_values.shape_grad(k, q);
4045 * scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
4048 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
4049 *
for (
unsigned int j = 0; j < dofs_per_cell; ++j)
4051 * data.local_mass_matrix(i, j) +=
4052 * (scratch.phi_T[i] * scratch.phi_T[j] *
4053 * scratch.temperature_fe_values.JxW(q));
4054 * data.local_stiffness_matrix(i, j) +=
4055 * (EquationData::kappa * scratch.grad_phi_T[i] *
4056 * scratch.grad_phi_T[j] * scratch.temperature_fe_values.JxW(q));
4063 *
template <
int dim>
4064 *
void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_matrix(
4065 *
const Assembly::CopyData::TemperatureMatrix<dim> &data)
4067 * temperature_constraints.distribute_local_to_global(data.local_mass_matrix,
4068 * data.local_dof_indices,
4069 * temperature_mass_matrix);
4070 * temperature_constraints.distribute_local_to_global(
4071 * data.local_stiffness_matrix,
4072 * data.local_dof_indices,
4073 * temperature_stiffness_matrix);
4077 *
template <
int dim>
4078 *
void BoussinesqFlowProblem<dim>::assemble_temperature_matrix()
4080 *
if (rebuild_temperature_matrices ==
false)
4084 *
" Assemble temperature matrices");
4085 * temperature_mass_matrix = 0;
4086 * temperature_stiffness_matrix = 0;
4088 *
const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
4090 *
using CellFilter =
4095 * temperature_dof_handler.begin_active()),
4097 * temperature_dof_handler.end()),
4099 * Assembly::Scratch::TemperatureMatrix<dim> &scratch,
4100 * Assembly::CopyData::TemperatureMatrix<dim> &data) {
4101 * this->local_assemble_temperature_matrix(cell, scratch, data);
4103 * [
this](
const Assembly::CopyData::TemperatureMatrix<dim> &data) {
4104 * this->copy_local_to_global_temperature_matrix(data);
4106 * Assembly::Scratch::TemperatureMatrix<dim>(temperature_fe,
4108 * quadrature_formula),
4109 * Assembly::CopyData::TemperatureMatrix<dim>(temperature_fe));
4114 * rebuild_temperature_matrices =
false;
4115 * rebuild_temperature_preconditioner =
true;
4122 * <a name=
"step_32-Temperaturerighthandsideassembly"></a>
4123 * <h5>Temperature right hand side assembly</h5>
4127 * This is the last assembly function. It calculates the right hand side of
4128 * the temperature system, which includes the convection and the
4129 * stabilization terms. It includes a lot of evaluations of old solutions at
4130 * the quadrature points (which are necessary
for calculating the artificial
4131 * viscosity of stabilization), but is otherwise similar to the other
4132 * assembly
functions. Notice, once again, how we resolve the dilemma of
4133 * having inhomogeneous boundary conditions, by just making a right hand
4134 * side at
this point (compare the comments
for the <code>
project()</code>
4135 * function above): We create some
matrix columns with exactly the values
4136 * that would be entered for the temperature @ref GlossStiffnessMatrix
"stiffness matrix", in case we
4137 * have inhomogeneously constrained dofs. That will account for the correct
4138 * balance of the right hand side vector with the
matrix system of
4142 * template <
int dim>
4143 * void BoussinesqFlowProblem<dim>::local_assemble_temperature_rhs(
4144 * const
std::pair<double, double> global_T_range,
4145 * const double global_max_velocity,
4146 * const double global_entropy_variation,
4147 * const typename
DoFHandler<dim>::active_cell_iterator &cell,
4148 * Assembly::Scratch::TemperatureRHS<dim> &scratch,
4149 * Assembly::CopyData::TemperatureRHS<dim> &data)
4151 * const
bool use_bdf2_scheme = (timestep_number != 0);
4153 *
const unsigned int dofs_per_cell =
4154 * scratch.temperature_fe_values.get_fe().n_dofs_per_cell();
4155 *
const unsigned int n_q_points =
4156 * scratch.temperature_fe_values.n_quadrature_points;
4160 * data.local_rhs = 0;
4161 * data.matrix_for_bc = 0;
4162 * cell->get_dof_indices(data.local_dof_indices);
4164 * scratch.temperature_fe_values.
reinit(cell);
4167 * cell->as_dof_handler_iterator(stokes_dof_handler);
4168 * scratch.stokes_fe_values.
reinit(stokes_cell);
4170 * scratch.temperature_fe_values.get_function_values(
4171 * old_temperature_solution, scratch.old_temperature_values);
4172 * scratch.temperature_fe_values.get_function_values(
4173 * old_old_temperature_solution, scratch.old_old_temperature_values);
4175 * scratch.temperature_fe_values.get_function_gradients(
4176 * old_temperature_solution, scratch.old_temperature_grads);
4177 * scratch.temperature_fe_values.get_function_gradients(
4178 * old_old_temperature_solution, scratch.old_old_temperature_grads);
4180 * scratch.temperature_fe_values.get_function_laplacians(
4181 * old_temperature_solution, scratch.old_temperature_laplacians);
4182 * scratch.temperature_fe_values.get_function_laplacians(
4183 * old_old_temperature_solution, scratch.old_old_temperature_laplacians);
4185 * scratch.stokes_fe_values[velocities].get_function_values(
4186 * stokes_solution, scratch.old_velocity_values);
4187 * scratch.stokes_fe_values[velocities].get_function_values(
4188 * old_stokes_solution, scratch.old_old_velocity_values);
4189 * scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
4190 * stokes_solution, scratch.old_strain_rates);
4191 * scratch.stokes_fe_values[velocities].get_function_symmetric_gradients(
4192 * old_stokes_solution, scratch.old_old_strain_rates);
4195 * compute_viscosity(scratch.old_temperature_values,
4196 * scratch.old_old_temperature_values,
4197 * scratch.old_temperature_grads,
4198 * scratch.old_old_temperature_grads,
4199 * scratch.old_temperature_laplacians,
4200 * scratch.old_old_temperature_laplacians,
4201 * scratch.old_velocity_values,
4202 * scratch.old_old_velocity_values,
4203 * scratch.old_strain_rates,
4204 * scratch.old_old_strain_rates,
4205 * global_max_velocity,
4206 * global_T_range.second - global_T_range.first,
4207 * 0.5 * (global_T_range.second + global_T_range.first),
4208 * global_entropy_variation,
4209 * cell->diameter());
4211 *
for (
unsigned int q = 0; q < n_q_points; ++q)
4213 *
for (
unsigned int k = 0; k < dofs_per_cell; ++k)
4215 * scratch.phi_T[k] = scratch.temperature_fe_values.shape_value(k, q);
4216 * scratch.grad_phi_T[k] =
4217 * scratch.temperature_fe_values.shape_grad(k, q);
4221 *
const double T_term_for_rhs =
4222 * (use_bdf2_scheme ?
4223 * (scratch.old_temperature_values[q] *
4224 * (1 + time_step / old_time_step) -
4225 * scratch.old_old_temperature_values[q] * (time_step * time_step) /
4226 * (old_time_step * (time_step + old_time_step))) :
4227 * scratch.old_temperature_values[q]);
4229 *
const double ext_T =
4230 * (use_bdf2_scheme ? (scratch.old_temperature_values[q] *
4231 * (1 + time_step / old_time_step) -
4232 * scratch.old_old_temperature_values[q] *
4233 * time_step / old_time_step) :
4234 * scratch.old_temperature_values[q]);
4237 * (use_bdf2_scheme ? (scratch.old_temperature_grads[q] *
4238 * (1 + time_step / old_time_step) -
4239 * scratch.old_old_temperature_grads[q] * time_step /
4241 * scratch.old_temperature_grads[q]);
4244 * (use_bdf2_scheme ?
4245 * (scratch.old_velocity_values[q] * (1 + time_step / old_time_step) -
4246 * scratch.old_old_velocity_values[q] * time_step / old_time_step) :
4247 * scratch.old_velocity_values[q]);
4250 * (use_bdf2_scheme ?
4251 * (scratch.old_strain_rates[q] * (1 + time_step / old_time_step) -
4252 * scratch.old_old_strain_rates[q] * time_step / old_time_step) :
4253 * scratch.old_strain_rates[q]);
4255 *
const double gamma =
4256 * ((EquationData::radiogenic_heating * EquationData::density(ext_T) +
4257 * 2 * EquationData::eta * extrapolated_strain_rate *
4258 * extrapolated_strain_rate) /
4259 * (EquationData::density(ext_T) * EquationData::specific_heat));
4261 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
4263 * data.local_rhs(i) +=
4264 * (T_term_for_rhs * scratch.phi_T[i] -
4265 * time_step * extrapolated_u * ext_grad_T * scratch.phi_T[i] -
4266 * time_step * nu * ext_grad_T * scratch.grad_phi_T[i] +
4267 * time_step * gamma * scratch.phi_T[i]) *
4268 * scratch.temperature_fe_values.JxW(q);
4270 *
if (temperature_constraints.is_inhomogeneously_constrained(
4271 * data.local_dof_indices[i]))
4273 *
for (
unsigned int j = 0; j < dofs_per_cell; ++j)
4274 * data.matrix_for_bc(j, i) +=
4275 * (scratch.phi_T[i] * scratch.phi_T[j] *
4276 * (use_bdf2_scheme ? ((2 * time_step + old_time_step) /
4277 * (time_step + old_time_step)) :
4279 * scratch.grad_phi_T[i] * scratch.grad_phi_T[j] *
4280 * EquationData::kappa * time_step) *
4281 * scratch.temperature_fe_values.JxW(q);
4288 *
template <
int dim>
4289 *
void BoussinesqFlowProblem<dim>::copy_local_to_global_temperature_rhs(
4290 *
const Assembly::CopyData::TemperatureRHS<dim> &data)
4292 * temperature_constraints.distribute_local_to_global(data.local_rhs,
4293 * data.local_dof_indices,
4295 * data.matrix_for_bc);
4302 * In the function that runs the
WorkStream for actually calculating the
4303 * right hand side, we also generate the
final matrix. As mentioned above,
4304 * it is a
sum of the mass
matrix and the Laplace
matrix, times some time
4305 * step-dependent weight. This weight is specified by the BDF-2 time
4306 * integration scheme, see the introduction in @ref step_31
"step-31". What is
new in
this
4307 * tutorial program (in addition to the use of
MPI parallelization and the
4308 *
WorkStream class), is that we now precompute the temperature
4309 * preconditioner as well. The reason is that the setup of the Jacobi
4310 * preconditioner takes a noticeable time compared to the solver because we
4311 * usually only need between 10 and 20 iterations
for solving the
4312 * temperature system (
this might sound strange, as Jacobi really only
4313 * consists of a diagonal, but in Trilinos it is derived from more general
4314 * framework
for point relaxation preconditioners which is a bit
4315 * inefficient). Hence, it is more efficient to precompute the
4316 * preconditioner, even though the
matrix entries may slightly change
4317 * because the time step might change. This is not too big a problem because
4318 * we
remesh every few time steps (and regenerate the preconditioner then).
4321 *
template <
int dim>
4322 *
void BoussinesqFlowProblem<dim>::assemble_temperature_system(
4323 *
const double maximal_velocity)
4325 *
const bool use_bdf2_scheme = (timestep_number != 0);
4327 *
if (use_bdf2_scheme ==
true)
4329 * temperature_matrix.copy_from(temperature_mass_matrix);
4330 * temperature_matrix *=
4331 * (2 * time_step + old_time_step) / (time_step + old_time_step);
4332 * temperature_matrix.add(time_step, temperature_stiffness_matrix);
4336 * temperature_matrix.copy_from(temperature_mass_matrix);
4337 * temperature_matrix.add(time_step, temperature_stiffness_matrix);
4340 *
if (rebuild_temperature_preconditioner ==
true)
4342 * T_preconditioner =
4343 * std::make_shared<TrilinosWrappers::PreconditionJacobi>();
4344 * T_preconditioner->initialize(temperature_matrix);
4345 * rebuild_temperature_preconditioner =
false;
4350 * The next part is computing the right hand side vectors. To
do so, we
4351 * first compute the average temperature @f$T_m@f$ that we use
for evaluating
4352 * the artificial viscosity stabilization through the residual @f$E(T) =
4353 * (
T-T_m)^2@f$. We
do this by defining the midpoint between maximum and
4354 * minimum temperature as average temperature in the definition of the
4355 * entropy viscosity. An alternative would be to use the integral average,
4356 * but the results are not very sensitive to
this choice. The rest then
4357 * only
requires calling
WorkStream::run again, binding the arguments to
4358 * the <code>local_assemble_temperature_rhs</code> function that are the
4359 * same in every call to the correct values:
4362 * temperature_rhs = 0;
4364 *
const QGauss<dim> quadrature_formula(parameters.temperature_degree + 2);
4365 *
const std::pair<double, double> global_T_range =
4366 * get_extrapolated_temperature_range();
4368 *
const double average_temperature =
4369 * 0.5 * (global_T_range.first + global_T_range.second);
4370 *
const double global_entropy_variation =
4371 * get_entropy_variation(average_temperature);
4373 *
using CellFilter =
4377 * [
this, global_T_range, maximal_velocity, global_entropy_variation](
4379 * Assembly::Scratch::TemperatureRHS<dim> &scratch,
4380 * Assembly::CopyData::TemperatureRHS<dim> &data) {
4381 * this->local_assemble_temperature_rhs(global_T_range,
4383 * global_entropy_variation,
4389 *
auto copier = [
this](
const Assembly::CopyData::TemperatureRHS<dim> &data) {
4390 * this->copy_local_to_global_temperature_rhs(data);
4394 * temperature_dof_handler.begin_active()),
4396 * temperature_dof_handler.end()),
4399 * Assembly::Scratch::TemperatureRHS<dim>(
4400 * temperature_fe, stokes_fe,
mapping, quadrature_formula),
4401 * Assembly::CopyData::TemperatureRHS<dim>(temperature_fe));
4411 * <a name=
"step_32-BoussinesqFlowProblemsolve"></a>
4412 * <h4>BoussinesqFlowProblem::solve</h4>
4416 * This function solves the linear systems in each time step of the
4417 * Boussinesq problem. First, we work on the Stokes system and then on the
4418 * temperature system. In essence, it does the same things as the respective
4419 * function in @ref step_31
"step-31". However, there are a few changes here.
4423 * The first change is related to the way we store our solution: we keep the
4424 * vectors with locally owned degrees of freedom plus ghost nodes on each
4425 *
MPI node. When we enter a solver which is supposed to perform
4426 *
matrix-vector products with a distributed
matrix,
this is not the
4427 * appropriate form, though. There, we will want to have the solution vector
4428 * to be distributed in the same way as the
matrix, i.e. without any
4429 * ghosts. So what we
do first is to generate a distributed vector called
4430 * <code>distributed_stokes_solution</code> and put only the locally owned
4431 * dofs into that, which is neatly done by the <code>
operator=</code> of the
4436 * Next, we
scale the pressure solution (or rather, the initial guess)
for
4437 * the solver so that it matches with the length scales in the matrices, as
4438 * discussed in the introduction. We also immediately
scale the pressure
4439 * solution back to the correct units after the solution is completed. We
4440 * also need to set the pressure values at hanging nodes to
zero. This we
4441 * also did in @ref step_31
"step-31" in order not to disturb the Schur complement by some
4442 * vector entries that actually are irrelevant during the solve stage. As a
4443 * difference to @ref step_31
"step-31", here we
do it only
for the locally owned pressure
4444 * dofs. After solving
for the Stokes solution, each processor copies the
4445 * distributed solution back into the solution vector that also includes
4450 * The third and most obvious change is that we have two variants
for the
4451 * Stokes solver:
A fast solver that sometimes breaks down, and a robust
4452 * solver that is slower. This is what we already discussed in the
4453 * introduction. Here is how we realize it: First, we perform 30 iterations
4454 * with the fast solver based on the simple preconditioner based on the AMG
4455 *
V-cycle instead of an
approximate solve (
this is indicated by the
4456 * <code>false</code> argument to the
4457 * <code>LinearSolvers::BlockSchurPreconditioner</code>
object). If we
4458 * converge, everything is fine. If we
do not converge, the solver control
4460 *
this would
abort the program because we don
't catch them in our usual
4461 * <code>solve()</code> functions. This is certainly not what we want to
4462 * happen here. Rather, we want to switch to the strong solver and continue
4463 * the solution process with whatever vector we got so far. Hence, we catch
4464 * the exception with the C++ try/catch mechanism. We then simply go through
4465 * the same solver sequence again in the <code>catch</code> clause, this
4466 * time passing the @p true flag to the preconditioner for the strong
4467 * solver, signaling an approximate CG solve.
4470 * template <int dim>
4471 * void BoussinesqFlowProblem<dim>::solve()
4474 * TimerOutput::Scope timer_section(computing_timer,
4475 * " Solve Stokes system");
4477 * pcout << " Solving Stokes system... " << std::flush;
4479 * TrilinosWrappers::MPI::BlockVector distributed_stokes_solution(
4481 * distributed_stokes_solution = stokes_solution;
4483 * distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
4485 * const unsigned int
4486 * start = (distributed_stokes_solution.block(0).size() +
4487 * distributed_stokes_solution.block(1).local_range().first),
4488 * end = (distributed_stokes_solution.block(0).size() +
4489 * distributed_stokes_solution.block(1).local_range().second);
4490 * for (unsigned int i = start; i < end; ++i)
4491 * if (stokes_constraints.is_constrained(i))
4492 * distributed_stokes_solution(i) = 0;
4495 * PrimitiveVectorMemory<TrilinosWrappers::MPI::BlockVector> mem;
4497 * unsigned int n_iterations = 0;
4498 * const double solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
4499 * SolverControl solver_control(30, solver_tolerance);
4503 * const LinearSolvers::BlockSchurPreconditioner<
4504 * TrilinosWrappers::PreconditionAMG,
4505 * TrilinosWrappers::PreconditionJacobi>
4506 * preconditioner(stokes_matrix,
4507 * stokes_preconditioner_matrix,
4508 * *Mp_preconditioner,
4509 * *Amg_preconditioner,
4512 * SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
4515 * SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
4517 * solver.solve(stokes_matrix,
4518 * distributed_stokes_solution,
4522 * n_iterations = solver_control.last_step();
4525 * catch (SolverControl::NoConvergence &)
4527 * const LinearSolvers::BlockSchurPreconditioner<
4528 * TrilinosWrappers::PreconditionAMG,
4529 * TrilinosWrappers::PreconditionJacobi>
4530 * preconditioner(stokes_matrix,
4531 * stokes_preconditioner_matrix,
4532 * *Mp_preconditioner,
4533 * *Amg_preconditioner,
4536 * SolverControl solver_control_refined(stokes_matrix.m(),
4537 * solver_tolerance);
4538 * SolverFGMRES<TrilinosWrappers::MPI::BlockVector> solver(
4539 * solver_control_refined,
4541 * SolverFGMRES<TrilinosWrappers::MPI::BlockVector>::AdditionalData(
4543 * solver.solve(stokes_matrix,
4544 * distributed_stokes_solution,
4549 * (solver_control.last_step() + solver_control_refined.last_step());
4553 * stokes_constraints.distribute(distributed_stokes_solution);
4555 * distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
4557 * stokes_solution = distributed_stokes_solution;
4558 * pcout << n_iterations << " iterations." << std::endl;
4564 * Now let's turn to the temperature part: First, we compute the time step
4565 * size. We found that we need smaller time steps
for 3
d than
for 2
d for
4566 * the shell geometry. This is because the cells are more distorted in
4567 * that
case (it is the smallest edge length that determines the CFL
4568 * number). Instead of computing the time step from maximum velocity and
4569 * minimal mesh size as in @ref step_31
"step-31", we compute local CFL
numbers, i.e., on
4570 * each cell we compute the maximum velocity times the mesh size, and
4571 * compute the maximum of them. Hence, we need to choose the factor in
4572 * front of the time step slightly smaller. (We later re-considered
this
4573 * approach towards time stepping. If you
're curious about this, you may
4574 * want to read the time stepping section in @cite HDGB17 .)
4578 * After temperature right hand side assembly, we solve the linear
4579 * system for temperature (with fully distributed vectors without
4580 * ghost elements and using the solution from the last timestep as
4581 * our initial guess for the iterative solver), apply constraints,
4582 * and copy the vector back to one with ghosts.
4586 * In the end, we extract the temperature range similarly to @ref step_31 "step-31" to
4587 * produce some output (for example in order to help us choose the
4588 * stabilization constants, as discussed in the introduction). The only
4589 * difference is that we need to exchange maxima over all processors.
4593 * TimerOutput::Scope timer_section(computing_timer,
4594 * " Assemble temperature rhs");
4596 * old_time_step = time_step;
4598 * const double scaling = (dim == 3 ? 0.25 : 1.0);
4599 * time_step = (scaling / (2.1 * dim * std::sqrt(1. * dim)) /
4600 * (parameters.temperature_degree * get_cfl_number()));
4602 * const double maximal_velocity = get_maximal_velocity();
4603 * pcout << " Maximal velocity: "
4604 * << maximal_velocity * EquationData::year_in_seconds * 100
4605 * << " cm/year" << std::endl;
4607 * << "Time step: " << time_step / EquationData::year_in_seconds
4608 * << " years" << std::endl;
4610 * assemble_temperature_system(maximal_velocity);
4614 * TimerOutput::Scope timer_section(computing_timer,
4615 * " Solve temperature system");
4617 * SolverControl solver_control(temperature_matrix.m(),
4618 * 1e-12 * temperature_rhs.l2_norm());
4619 * SolverCG<TrilinosWrappers::MPI::Vector> cg(solver_control);
4621 * TrilinosWrappers::MPI::Vector distributed_temperature_solution(
4623 * distributed_temperature_solution = old_temperature_solution;
4625 * cg.solve(temperature_matrix,
4626 * distributed_temperature_solution,
4628 * *T_preconditioner);
4630 * temperature_constraints.distribute(distributed_temperature_solution);
4631 * temperature_solution = distributed_temperature_solution;
4633 * pcout << " " << solver_control.last_step()
4634 * << " CG iterations for temperature" << std::endl;
4636 * double temperature[2] = {std::numeric_limits<double>::max(),
4637 * std::numeric_limits<double>::lowest()};
4638 * double global_temperature[2];
4640 * for (unsigned int i =
4641 * distributed_temperature_solution.local_range().first;
4642 * i < distributed_temperature_solution.local_range().second;
4646 * std::min<double>(temperature[0],
4647 * distributed_temperature_solution(i));
4649 * std::max<double>(temperature[1],
4650 * distributed_temperature_solution(i));
4653 * temperature[0] *= -1.0;
4654 * Utilities::MPI::max(temperature, MPI_COMM_WORLD, global_temperature);
4655 * global_temperature[0] *= -1.0;
4657 * pcout << " Temperature range: " << global_temperature[0] << ' '
4658 * << global_temperature[1] << std::endl;
4666 * <a name="step_32-BoussinesqFlowProblemoutput_results"></a>
4667 * <h4>BoussinesqFlowProblem::output_results</h4>
4671 * Next comes the function that generates the output. The quantities to
4672 * output could be introduced manually like we did in @ref step_31 "step-31". An
4673 * alternative is to hand this task over to a class PostProcessor that
4674 * inherits from the class DataPostprocessor, which can be attached to
4675 * DataOut. This allows us to output derived quantities from the solution,
4676 * like the friction heating included in this example. It overloads the
4677 * virtual function DataPostprocessor::evaluate_vector_field(),
4678 * which is then internally called from DataOut::build_patches(). We have to
4679 * give it values of the numerical solution, its derivatives, normals to the
4680 * cell, the actual evaluation points and any additional quantities. This
4681 * follows the same procedure as discussed in @ref step_29 "step-29" and other programs.
4684 * template <int dim>
4685 * class BoussinesqFlowProblem<dim>::Postprocessor
4686 * : public DataPostprocessor<dim>
4689 * Postprocessor(const unsigned int partition, const double minimal_pressure);
4691 * virtual void evaluate_vector_field(
4692 * const DataPostprocessorInputs::Vector<dim> &inputs,
4693 * std::vector<Vector<double>> &computed_quantities) const override;
4695 * virtual std::vector<std::string> get_names() const override;
4697 * virtual std::vector<
4698 * DataComponentInterpretation::DataComponentInterpretation>
4699 * get_data_component_interpretation() const override;
4701 * virtual UpdateFlags get_needed_update_flags() const override;
4704 * const unsigned int partition;
4705 * const double minimal_pressure;
4709 * template <int dim>
4710 * BoussinesqFlowProblem<dim>::Postprocessor::Postprocessor(
4711 * const unsigned int partition,
4712 * const double minimal_pressure)
4713 * : partition(partition)
4714 * , minimal_pressure(minimal_pressure)
4720 * Here we define the names for the variables we want to output. These are
4721 * the actual solution values for velocity, pressure, and temperature, as
4722 * well as the friction heating and to each cell the number of the processor
4723 * that owns it. This allows us to visualize the partitioning of the domain
4724 * among the processors. Except for the velocity, which is vector-valued,
4725 * all other quantities are scalar.
4728 * template <int dim>
4729 * std::vector<std::string>
4730 * BoussinesqFlowProblem<dim>::Postprocessor::get_names() const
4732 * std::vector<std::string> solution_names(dim, "velocity");
4733 * solution_names.emplace_back("p");
4734 * solution_names.emplace_back("T");
4735 * solution_names.emplace_back("friction_heating");
4736 * solution_names.emplace_back("partition");
4738 * return solution_names;
4742 * template <int dim>
4743 * std::vector<DataComponentInterpretation::DataComponentInterpretation>
4744 * BoussinesqFlowProblem<dim>::Postprocessor::get_data_component_interpretation()
4747 * std::vector<DataComponentInterpretation::DataComponentInterpretation>
4748 * interpretation(dim,
4749 * DataComponentInterpretation::component_is_part_of_vector);
4751 * interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4752 * interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4753 * interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4754 * interpretation.push_back(DataComponentInterpretation::component_is_scalar);
4756 * return interpretation;
4760 * template <int dim>
4762 * BoussinesqFlowProblem<dim>::Postprocessor::get_needed_update_flags() const
4764 * return update_values | update_gradients | update_quadrature_points;
4770 * Now we implement the function that computes the derived quantities. As we
4771 * also did for the output, we rescale the velocity from its SI units to
4772 * something more readable, namely cm/year. Next, the pressure is scaled to
4773 * be between 0 and the maximum pressure. This makes it more easily
4774 * comparable -- in essence making all pressure variables positive or
4775 * zero. Temperature is taken as is, and the friction heating is computed as
4776 * @f$2 \eta \varepsilon(\mathbf{u}) \cdot \varepsilon(\mathbf{u})@f$.
4780 * The quantities we output here are more for illustration, rather than for
4781 * actual scientific value. We come back to this briefly in the results
4782 * section of this program and explain what one may in fact be interested in.
4785 * template <int dim>
4786 * void BoussinesqFlowProblem<dim>::Postprocessor::evaluate_vector_field(
4787 * const DataPostprocessorInputs::Vector<dim> &inputs,
4788 * std::vector<Vector<double>> &computed_quantities) const
4790 * const unsigned int n_evaluation_points = inputs.solution_values.size();
4791 * Assert(inputs.solution_gradients.size() == n_evaluation_points,
4792 * ExcInternalError());
4793 * Assert(computed_quantities.size() == n_evaluation_points,
4794 * ExcInternalError());
4795 * Assert(inputs.solution_values[0].size() == dim + 2, ExcInternalError());
4797 * for (unsigned int p = 0; p < n_evaluation_points; ++p)
4799 * for (unsigned int d = 0; d < dim; ++d)
4800 * computed_quantities[p](d) = (inputs.solution_values[p](d) *
4801 * EquationData::year_in_seconds * 100);
4803 * const double pressure =
4804 * (inputs.solution_values[p](dim) - minimal_pressure);
4805 * computed_quantities[p](dim) = pressure;
4807 * const double temperature = inputs.solution_values[p](dim + 1);
4808 * computed_quantities[p](dim + 1) = temperature;
4810 * Tensor<2, dim> grad_u;
4811 * for (unsigned int d = 0; d < dim; ++d)
4812 * grad_u[d] = inputs.solution_gradients[p][d];
4813 * const SymmetricTensor<2, dim> strain_rate = symmetrize(grad_u);
4814 * computed_quantities[p](dim + 2) =
4815 * 2 * EquationData::eta * strain_rate * strain_rate;
4817 * computed_quantities[p](dim + 3) = partition;
4824 * The <code>output_results()</code> function has a similar task to the one
4825 * in @ref step_31 "step-31". However, here we are going to demonstrate a different
4826 * technique on how to merge output from different DoFHandler objects. The
4827 * way we're going to achieve
this recombination is to create a joint
4828 *
DoFHandler that collects both components, the Stokes solution and the
4829 * temperature solution. This can be nicely done by combining the finite
4830 * elements from the two systems to form
one FESystem, and let
this
4831 * collective system define a
new DoFHandler object. To be sure that
4832 * everything was done correctly, we perform a sanity
check that ensures
4833 * that we got all the dofs from both Stokes and temperature even in the
4834 * combined system. We then combine the data vectors. Unfortunately, there
4835 * is no straight-forward relation that tells us how to sort Stokes and
4836 * temperature vector into the joint vector. The way we can get around
this
4837 * trouble is to rely on the information collected in the
FESystem. For each
4838 * dof on a cell, the joint finite element knows to which equation component
4839 * (velocity component, pressure, or temperature) it belongs – that
's the
4840 * information we need! So we step through all cells (with iterators into
4841 * all three DoFHandlers moving in sync), and for each joint cell dof, we
4842 * read out that component using the FiniteElement::system_to_base_index
4843 * function (see there for a description of what the various parts of its
4844 * return value contain). We also need to keep track whether we're on a
4845 * Stokes dof or a temperature dof, which is contained in
4847 * data structures on either of the three systems tell us how the relation
4848 * between global vector and local dofs looks like on the present cell,
4849 * which concludes
this tedious work. We make sure that each processor only
4850 * works on the subdomain it owns locally (and not on ghost or artificial
4851 * cells) when building the joint solution vector. The same will then have
4857 * What we end up with is a set of patches that we can write
using the
4858 * functions in
DataOutBase in a variety of output formats. Here, we then
4859 * have to pay attention that what each processor writes is really only its
4860 * own part of the domain, i.e. we will want to write each processor
's
4861 * contribution into a separate file. This we do by adding an additional
4862 * number to the filename when we write the solution. This is not really
4863 * new, we did it similarly in @ref step_40 "step-40". Note that we write in the compressed
4864 * format @p .vtu instead of plain vtk files, which saves quite some
4869 * All the rest of the work is done in the PostProcessor class.
4872 * template <int dim>
4873 * void BoussinesqFlowProblem<dim>::output_results()
4875 * TimerOutput::Scope timer_section(computing_timer, "Postprocessing");
4877 * const FESystem<dim> joint_fe(stokes_fe, 1, temperature_fe, 1);
4879 * DoFHandler<dim> joint_dof_handler(triangulation);
4880 * joint_dof_handler.distribute_dofs(joint_fe);
4881 * Assert(joint_dof_handler.n_dofs() ==
4882 * stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
4883 * ExcInternalError());
4885 * TrilinosWrappers::MPI::Vector joint_solution;
4886 * joint_solution.reinit(joint_dof_handler.locally_owned_dofs(),
4890 * std::vector<types::global_dof_index> local_joint_dof_indices(
4891 * joint_fe.n_dofs_per_cell());
4892 * std::vector<types::global_dof_index> local_stokes_dof_indices(
4893 * stokes_fe.n_dofs_per_cell());
4894 * std::vector<types::global_dof_index> local_temperature_dof_indices(
4895 * temperature_fe.n_dofs_per_cell());
4897 * typename DoFHandler<dim>::active_cell_iterator
4898 * joint_cell = joint_dof_handler.begin_active(),
4899 * joint_endc = joint_dof_handler.end(),
4900 * stokes_cell = stokes_dof_handler.begin_active(),
4901 * temperature_cell = temperature_dof_handler.begin_active();
4902 * for (; joint_cell != joint_endc;
4903 * ++joint_cell, ++stokes_cell, ++temperature_cell)
4904 * if (joint_cell->is_locally_owned())
4906 * joint_cell->get_dof_indices(local_joint_dof_indices);
4907 * stokes_cell->get_dof_indices(local_stokes_dof_indices);
4908 * temperature_cell->get_dof_indices(local_temperature_dof_indices);
4910 * for (unsigned int i = 0; i < joint_fe.n_dofs_per_cell(); ++i)
4911 * if (joint_fe.system_to_base_index(i).first.first == 0)
4913 * Assert(joint_fe.system_to_base_index(i).second <
4914 * local_stokes_dof_indices.size(),
4915 * ExcInternalError());
4917 * joint_solution(local_joint_dof_indices[i]) = stokes_solution(
4918 * local_stokes_dof_indices[joint_fe.system_to_base_index(i)
4923 * Assert(joint_fe.system_to_base_index(i).first.first == 1,
4924 * ExcInternalError());
4925 * Assert(joint_fe.system_to_base_index(i).second <
4926 * local_temperature_dof_indices.size(),
4927 * ExcInternalError());
4928 * joint_solution(local_joint_dof_indices[i]) =
4929 * temperature_solution(
4930 * local_temperature_dof_indices
4931 * [joint_fe.system_to_base_index(i).second]);
4936 * joint_solution.compress(VectorOperation::insert);
4938 * const IndexSet locally_relevant_joint_dofs =
4939 * DoFTools::extract_locally_relevant_dofs(joint_dof_handler);
4940 * TrilinosWrappers::MPI::Vector locally_relevant_joint_solution;
4941 * locally_relevant_joint_solution.reinit(locally_relevant_joint_dofs,
4943 * locally_relevant_joint_solution = joint_solution;
4945 * Postprocessor postprocessor(Utilities::MPI::this_mpi_process(
4947 * stokes_solution.block(1).min());
4949 * DataOut<dim> data_out;
4950 * data_out.attach_dof_handler(joint_dof_handler);
4951 * data_out.add_data_vector(locally_relevant_joint_solution, postprocessor);
4952 * data_out.build_patches();
4954 * static int out_index = 0;
4955 * data_out.write_vtu_with_pvtu_record(
4956 * "./", "solution", out_index, MPI_COMM_WORLD, 5);
4966 * <a name="step_32-BoussinesqFlowProblemrefine_mesh"></a>
4967 * <h4>BoussinesqFlowProblem::refine_mesh</h4>
4971 * This function isn't really
new either. Since the <code>setup_dofs</code>
4972 * function that we call in the middle has its own timer section, we split
4973 * timing
this function into two sections. It will also allow us to easily
4974 * identify which of the two is more expensive.
4978 * One thing of note, however, is that we only want to compute error
4979 * indicators on the locally owned subdomain. In order to achieve
this, we
4981 * function. Note that the vector
for error estimates is resized to the
4982 * number of active cells present on the current process, which is less than
4983 * the total number of active cells on all processors (but more than the
4984 * number of locally owned active cells); each processor only has a few
4985 * coarse cells around the locally owned ones, as also explained in @ref step_40
"step-40".
4989 * The local error estimates are then handed to a %
parallel version of
4991 * also @ref step_40
"step-40") which looks at the errors and finds the cells that need
4992 * refinement by comparing the error values across processors. As in
4993 * @ref step_31
"step-31", we want to limit the maximum grid level. So in
case some cells
4994 * have been marked that are already at the finest level, we simply clear
4998 *
template <
int dim>
5000 * BoussinesqFlowProblem<dim>::refine_mesh(
const unsigned int max_grid_level)
5003 * temperature_dof_handler);
5005 * stokes_dof_handler);
5009 *
"Refine mesh structure, part 1");
5011 *
Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
5014 * temperature_dof_handler,
5017 * temperature_solution,
5018 * estimated_error_per_cell,
5022 * triangulation.locally_owned_subdomain());
5025 * triangulation, estimated_error_per_cell, 0.3, 0.1);
5027 *
if (triangulation.n_levels() > max_grid_level)
5029 * triangulation.begin_active(max_grid_level);
5030 * cell != triangulation.end();
5032 * cell->clear_refine_flag();
5036 * With all flags marked as necessary, we can then tell the
5038 * the next, which they will
do when notified by
5039 *
Triangulation as part of the @p execute_coarsening_and_refinement() call.
5040 * The syntax is similar to the non-%
parallel solution transfer (with the
5041 * exception that here a pointer to the vector entries is enough). The
5042 * remainder of the function further down below is then concerned with
5043 * setting up the data structures again after mesh refinement and
5044 * restoring the solution vectors on the new mesh.
5048 * &temperature_solution, &old_temperature_solution};
5049 *
const std::vector<const TrilinosWrappers::MPI::BlockVector *> x_stokes = {
5050 * &stokes_solution, &old_stokes_solution};
5052 * triangulation.prepare_coarsening_and_refinement();
5054 * temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
5055 * stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
5057 * triangulation.execute_coarsening_and_refinement();
5064 *
"Refine mesh structure, part 2");
5070 * std::vector<TrilinosWrappers::MPI::Vector *> tmp = {&distributed_temp1,
5071 * &distributed_temp2};
5072 * temperature_trans.interpolate(tmp);
5076 * enforce constraints to make the interpolated solution conforming on
5080 * temperature_constraints.distribute(distributed_temp1);
5081 * temperature_constraints.distribute(distributed_temp2);
5083 * temperature_solution = distributed_temp1;
5084 * old_temperature_solution = distributed_temp2;
5091 * std::vector<TrilinosWrappers::MPI::BlockVector *> stokes_tmp = {
5092 * &distributed_stokes, &old_distributed_stokes};
5094 * stokes_trans.interpolate(stokes_tmp);
5098 * enforce constraints to make the interpolated solution conforming on
5102 * stokes_constraints.distribute(distributed_stokes);
5103 * stokes_constraints.distribute(old_distributed_stokes);
5105 * stokes_solution = distributed_stokes;
5106 * old_stokes_solution = old_distributed_stokes;
5116 * <a name=
"step_32-BoussinesqFlowProblemrun"></a>
5117 * <h4>BoussinesqFlowProblem::run</h4>
5121 * This is the
final and controlling function in
this class. It, in fact,
5122 * runs the entire rest of the program and is, once more, very similar to
5123 * @ref step_31
"step-31". The only substantial difference is that we use a different mesh
5127 *
template <
int dim>
5128 *
void BoussinesqFlowProblem<dim>::run()
5134 * (dim == 3) ? 96 : 12,
5139 * triangulation.refine_global(parameters.initial_global_refinement);
5143 *
unsigned int pre_refinement_step = 0;
5145 * start_time_iteration:
5149 * temperature_dof_handler.locally_owned_dofs());
5153 * standard finite elements via deal.II
's own native MatrixFree framework:
5154 * since we use standard Lagrange elements of moderate order this function
5158 * VectorTools::project(temperature_dof_handler,
5159 * temperature_constraints,
5160 * QGauss<dim>(parameters.temperature_degree + 2),
5161 * EquationData::TemperatureInitialValues<dim>(),
5165 * Having so computed the current temperature field, let us set the member
5166 * variable that holds the temperature nodes. Strictly speaking, we really
5167 * only need to set <code>old_temperature_solution</code> since the first
5168 * thing we will do is to compute the Stokes solution that only requires
5169 * the previous time step's temperature field. That said,
nothing good can
5170 * come from not initializing the other vectors as well (especially since
5171 * it
's a relatively cheap operation and we only have to do it once at the
5172 * beginning of the program) if we ever want to extend our numerical
5173 * method or physical model, and so we initialize
5174 * <code>old_temperature_solution</code> and
5175 * <code>old_old_temperature_solution</code> as well. The assignment makes
5176 * sure that the vectors on the left hand side (which where initialized to
5177 * contain ghost elements as well) also get the correct ghost elements. In
5178 * other words, the assignment here requires communication between
5182 * temperature_solution = solution;
5183 * old_temperature_solution = solution;
5184 * old_old_temperature_solution = solution;
5187 * timestep_number = 0;
5188 * time_step = old_time_step = 0;
5194 * pcout << "Timestep " << timestep_number
5195 * << ": t=" << time / EquationData::year_in_seconds << " years"
5198 * assemble_stokes_system();
5199 * build_stokes_preconditioner();
5200 * assemble_temperature_matrix();
5204 * pcout << std::endl;
5206 * if ((timestep_number == 0) &&
5207 * (pre_refinement_step < parameters.initial_adaptive_refinement))
5209 * refine_mesh(parameters.initial_global_refinement +
5210 * parameters.initial_adaptive_refinement);
5211 * ++pre_refinement_step;
5212 * goto start_time_iteration;
5214 * else if ((timestep_number > 0) &&
5215 * (timestep_number % parameters.adaptive_refinement_interval ==
5217 * refine_mesh(parameters.initial_global_refinement +
5218 * parameters.initial_adaptive_refinement);
5220 * if ((parameters.generate_graphical_output == true) &&
5221 * (timestep_number % parameters.graphical_output_interval == 0))
5226 * In order to speed up linear solvers, we extrapolate the solutions
5227 * from the old time levels to the new one. This gives a very good
5228 * initial guess, cutting the number of iterations needed in solvers
5229 * by more than one half. We do not need to extrapolate in the last
5230 * iteration, so if we reached the final time, we stop here.
5234 * As the last thing during a time step (before actually bumping up
5235 * the number of the time step), we check whether the current time
5236 * step number is divisible by 100, and if so we let the computing
5237 * timer print a summary of CPU times spent so far.
5240 * if (time > parameters.end_time * EquationData::year_in_seconds)
5243 * TrilinosWrappers::MPI::BlockVector old_old_stokes_solution;
5244 * old_old_stokes_solution = old_stokes_solution;
5245 * old_stokes_solution = stokes_solution;
5246 * old_old_temperature_solution = old_temperature_solution;
5247 * old_temperature_solution = temperature_solution;
5248 * if (old_time_step > 0)
5252 * Trilinos sadd does not like ghost vectors even as input. Copy
5253 * into distributed vectors for now:
5257 * TrilinosWrappers::MPI::BlockVector distr_solution(stokes_rhs);
5258 * distr_solution = stokes_solution;
5259 * TrilinosWrappers::MPI::BlockVector distr_old_solution(stokes_rhs);
5260 * distr_old_solution = old_old_stokes_solution;
5261 * distr_solution.sadd(1. + time_step / old_time_step,
5262 * -time_step / old_time_step,
5263 * distr_old_solution);
5264 * stokes_solution = distr_solution;
5267 * TrilinosWrappers::MPI::Vector distr_solution(temperature_rhs);
5268 * distr_solution = temperature_solution;
5269 * TrilinosWrappers::MPI::Vector distr_old_solution(temperature_rhs);
5270 * distr_old_solution = old_old_temperature_solution;
5271 * distr_solution.sadd(1. + time_step / old_time_step,
5272 * -time_step / old_time_step,
5273 * distr_old_solution);
5274 * temperature_solution = distr_solution;
5278 * if ((timestep_number > 0) && (timestep_number % 100 == 0))
5279 * computing_timer.print_summary();
5281 * time += time_step;
5282 * ++timestep_number;
5288 * If we are generating graphical output, do so also for the last time
5289 * step unless we had just done so before we left the do-while loop
5292 * if ((parameters.generate_graphical_output == true) &&
5293 * !((timestep_number - 1) % parameters.graphical_output_interval == 0))
5296 * } // namespace Step32
5303 * <a name="step_32-Thecodemaincodefunction"></a>
5304 * <h3>The <code>main</code> function</h3>
5308 * The main function is short as usual and very similar to the one in
5309 * @ref step_31 "step-31". Since we use a parameter file which is specified as an argument in
5310 * the command line, we have to read it in here and pass it on to the
5311 * Parameters class for parsing. If no filename is given in the command line,
5312 * we simply use the <code>step-32.prm</code> file which is distributed
5313 * together with the program.
5317 * Because 3d computations are simply very slow unless you throw a lot of
5318 * processors at them, the program defaults to 2d. You can get the 3d version
5319 * by changing the constant dimension below to 3.
5322 * int main(int argc, char *argv[])
5326 * using namespace Step32;
5327 * using namespace dealii;
5329 * Utilities::MPI::MPI_InitFinalize mpi_initialization(
5330 * argc, argv, numbers::invalid_unsigned_int);
5332 * std::string parameter_filename;
5334 * parameter_filename = argv[1];
5336 * parameter_filename = "step-32.prm";
5338 * const int dim = 2;
5339 * BoussinesqFlowProblem<dim>::Parameters parameters(parameter_filename);
5340 * BoussinesqFlowProblem<dim> flow_problem(parameters);
5341 * flow_problem.run();
5343 * catch (std::exception &exc)
5345 * std::cerr << std::endl
5347 * << "----------------------------------------------------"
5349 * std::cerr << "Exception on processing: " << std::endl
5350 * << exc.what() << std::endl
5351 * << "Aborting!" << std::endl
5352 * << "----------------------------------------------------"
5359 * std::cerr << std::endl
5361 * << "----------------------------------------------------"
5363 * std::cerr << "Unknown exception!" << std::endl
5364 * << "Aborting!" << std::endl
5365 * << "----------------------------------------------------"
5373<a name="step_32-Results"></a><h1>Results</h1>
5376When run, the program simulates convection in 3d in much the same way
5377as @ref step_31 "step-31" did, though with an entirely different testcase.
5380<a name="step_32-Comparisonofresultswithstep31"></a><h3>Comparison of results with step-31</h3>
5383Before we go to this testcase, however, let us show a few results from a
5384slightly earlier version of this program that was solving exactly the
5385testcase we used in @ref step_31 "step-31", just that we now solve it in parallel and with
5386much higher resolution. We show these results mainly for comparison.
5388Here are two images that show this higher resolution if we choose a 3d
5389computation in <code>main()</code> and if we set
5390<code>initial_refinement=3</code> and
5391<code>n_pre_refinement_steps=4</code>. At the time steps shown, the
5392meshes had around 72,000 and 236,000 cells, for a total of 2,680,000
5393and 8,250,000 degrees of freedom, respectively, more than an order of
5394magnitude more than we had available in @ref step_31 "step-31":
5396<table align="center" class="doxtable">
5399 <img src="https://www.dealii.org/images/steps/developer/step-32.3d.cube.0.png" alt="">
5404 <img src="https://www.dealii.org/images/steps/developer/step-32.3d.cube.1.png" alt="">
5409The computation was done on a subset of 50 processors of the Brazos
5410cluster at Texas A&M University.
5413<a name="step_32-Resultsfora2dcircularshelltestcase"></a><h3>Results for a 2d circular shell testcase</h3>
5416Next, we will run @ref step_32 "step-32" with the parameter file in the directory with one
5417change: we increase the final time to 1e9. Here we are using 16 processors. The
5418command to launch is (note that @ref step_32 "step-32".prm is the default):
5422\$ mpirun -np 16 ./step-32
5426Note that running a job on a cluster typically requires going through a job
5427scheduler, which we won't discuss here. The output will look roughly like
5432\$ mpirun -np 16 ./step-32
5433Number of active cells: 12,288 (on 6 levels)
5434Number of degrees of freedom: 186,624 (99,840+36,864+49,920)
5436Timestep 0: t=0 years
5438 Rebuilding Stokes preconditioner...
5439 Solving Stokes system... 41 iterations.
5440 Maximal velocity: 60.4935 cm/year
5441 Time step: 18166.9 years
5442 17 CG iterations
for temperature
5443 Temperature range: 973 4273.16
5445Number of active cells: 15,921 (on 7 levels)
5446Number of degrees of freedom: 252,723 (136,640+47,763+68,320)
5448Timestep 0: t=0 years
5450 Rebuilding Stokes preconditioner...
5451 Solving Stokes system... 50 iterations.
5452 Maximal velocity: 60.3223 cm/year
5453 Time step: 10557.6 years
5454 19 CG iterations
for temperature
5455 Temperature range: 973 4273.16
5457Number of active cells: 19,926 (on 8 levels)
5458Number of degrees of freedom: 321,246 (174,312+59,778+87,156)
5460Timestep 0: t=0 years
5462 Rebuilding Stokes preconditioner...
5463 Solving Stokes system... 50 iterations.
5464 Maximal velocity: 57.8396 cm/year
5465 Time step: 5453.78 years
5466 18 CG iterations
for temperature
5467 Temperature range: 973 4273.16
5469Timestep 1: t=5453.78 years
5471 Solving Stokes system... 49 iterations.
5472 Maximal velocity: 59.0231 cm/year
5473 Time step: 5345.86 years
5474 18 CG iterations
for temperature
5475 Temperature range: 973 4273.16
5477Timestep 2: t=10799.6 years
5479 Solving Stokes system... 24 iterations.
5480 Maximal velocity: 60.2139 cm/year
5481 Time step: 5241.51 years
5482 17 CG iterations
for temperature
5483 Temperature range: 973 4273.16
5487Timestep 100: t=272151 years
5489 Solving Stokes system... 21 iterations.
5490 Maximal velocity: 161.546 cm/year
5491 Time step: 1672.96 years
5492 17 CG iterations
for temperature
5493 Temperature range: 973 4282.57
5495Number of active cells: 56,085 (on 8 levels)
5496Number of degrees of freedom: 903,408 (490,102+168,255+245,051)
5500+---------------------------------------------+------------+------------+
5501| Total wallclock time elapsed since start | 115s | |
5503| Section | no. calls | wall time | % of total |
5504+---------------------------------+-----------+------------+------------+
5505| Assemble Stokes system | 103 | 2.82s | 2.5% |
5506| Assemble temperature matrices | 12 | 0.452s | 0.39% |
5507| Assemble temperature rhs | 103 | 11.5s | 10% |
5508| Build Stokes preconditioner | 12 | 2.09s | 1.8% |
5509| Solve Stokes system | 103 | 90.4s | 79% |
5510| Solve temperature system | 103 | 1.53s | 1.3% |
5511| Postprocessing | 3 | 0.532s | 0.46% |
5512| Refine mesh structure, part 1 | 12 | 0.93s | 0.81% |
5513| Refine mesh structure, part 2 | 12 | 0.384s | 0.33% |
5514| Setup dof systems | 13 | 2.96s | 2.6% |
5515+---------------------------------+-----------+------------+------------+
5519+---------------------------------------------+------------+------------+
5520| Total wallclock time elapsed since start | 9.14e+04s | |
5522| Section | no. calls | wall time | % of total |
5523+---------------------------------+-----------+------------+------------+
5524| Assemble Stokes system | 47045 | 2.05e+03s | 2.2% |
5525| Assemble temperature matrices | 4707 | 310s | 0.34% |
5526| Assemble temperature rhs | 47045 | 8.7e+03s | 9.5% |
5527| Build Stokes preconditioner | 4707 | 1.48e+03s | 1.6% |
5528| Solve Stokes system | 47045 | 7.34e+04s | 80% |
5529| Solve temperature system | 47045 | 1.46e+03s | 1.6% |
5530| Postprocessing | 1883 | 222s | 0.24% |
5531| Refine mesh structure, part 1 | 4706 | 641s | 0.7% |
5532| Refine mesh structure, part 2 | 4706 | 259s | 0.28% |
5533| Setup dof systems | 4707 | 1.86e+03s | 2% |
5534+---------------------------------+-----------+------------+------------+
5538The simulation terminates when the time reaches the 1 billion years
5539selected in the input file. You can extrapolate from
this how
long a
5540simulation would take
for a different
final time (the time step size
5541ultimately settles on somewhere around 20,000 years, so computing
for
5542two billion years will take 100,000 time steps, give or take 20%). As
5543can be seen here, we spend most of the compute time in assembling
5544linear systems and — above all — in solving Stokes
5548To demonstrate the output we show the output from every 1250th time step here:
5552 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-000.png" alt=
"">
5555 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-050.png" alt=
"">
5558 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-100.png" alt=
"">
5563 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-150.png" alt=
"">
5566 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-200.png" alt=
"">
5569 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-250.png" alt=
"">
5574 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-300.png" alt=
"">
5577 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-350.png" alt=
"">
5580 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-400.png" alt=
"">
5585 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-450.png" alt=
"">
5588 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-500.png" alt=
"">
5591 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-550.png" alt=
"">
5596 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-time-600.png" alt=
"">
5599 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-cells.png" alt=
"">
5602 <img src=
"https://www.dealii.org/images/steps/developer/step-32-2d-partition.png" alt=
"">
5607The last two images show the grid as well as the partitioning of the mesh
for
5608the same computation with 16 subdomains and 16 processors. The full dynamics of
5609this simulation are really only visible by looking at an animation,
for example
5611href=
"https://www.dealii.org/images/steps/developer/step-32-2d-temperature.webm">shown
5612on
this site</a>. This image is well worth watching due to its artistic quality
5613and entrancing depiction of the evolution of the magma plumes.
5615If you watch the movie, you
'll see that the convection pattern goes
5616through several stages: First, it gets rid of the instable temperature
5617layering with the hot material overlain by the dense cold
5618material. After this great driver is removed and we have a sort of
5619stable situation, a few blobs start to separate from the hot boundary
5620layer at the inner ring and rise up, with a few cold fingers also
5621dropping down from the outer boundary layer. During this phase, the solution
5622remains mostly symmetric, reflecting the 12-fold symmetry of the
5623original mesh. In a final phase, the fluid enters vigorous chaotic
5624stirring in which all symmetries are lost. This is a pattern that then
5625continues to dominate flow.
5627These different phases can also be identified if we look at the
5628maximal velocity as a function of time in the simulation:
5630<img src="https://www.dealii.org/images/steps/developer/step-32.2d.t_vs_vmax.png" alt="">
5632Here, the velocity (shown in centimeters per year) becomes very large,
5633to the order of several meters per year) at the beginning when the
5634temperature layering is instable. It then calms down to relatively
5635small values before picking up again in the chaotic stirring
5636regime. There, it remains in the range of 10-40 centimeters per year,
5637quite within the physically expected region.
5640<a name="step_32-Resultsfora3dsphericalshelltestcase"></a><h3>Results for a 3d spherical shell testcase</h3>
56433d computations are very expensive computationally. Furthermore, as
5644seen above, interesting behavior only starts after quite a long time
5645requiring more CPU hours than is available on a typical
5646cluster. Consequently, rather than showing a complete simulation here,
5647let us simply show a couple of pictures we have obtained using the
5648successor to this program, called <i>ASPECT</i> (short for <i>Advanced
5649%Solver for Problems in Earth's ConvecTion</i>), that is being
5650developed independently of deal.II and that already incorporates some
5651of the extensions discussed below. The following two pictures show
5652isocontours of the temperature and the partition of the domain (along
5653with the mesh) onto 512 processors:
5656<img src=
"https://www.dealii.org/images/steps/developer/step-32.3d-sphere.solution.png" alt=
"">
5658<img src=
"https://www.dealii.org/images/steps/developer/step-32.3d-sphere.partition.png" alt=
"">
5662<a name=
"step-32-extensions"></a>
5663<a name=
"step_32-Possibilitiesforextensions"></a><h3>Possibilities
for extensions</h3>
5666There are many directions in which
this program could be extended. As
5667mentioned at the
end of the introduction, most of these are under active
5668development in the <i>ASPECT</i> (
short for <i>Advanced %Solver
for Problems
5669in Earth
's ConvecTion</i>) code at the time this tutorial program is being
5670finished. Specifically, the following are certainly topics that one should
5671address to make the program more useful:
5674 <li> <b>Adiabatic heating/cooling:</b>
5675 The temperature field we get in our simulations after a while
5676 is mostly constant with boundary layers at the inner and outer
5677 boundary, and streamers of cold and hot material mixing
5678 everything. Yet, this doesn't match our expectation that things
5679 closer to the earth core should be hotter than closer to the
5680 surface. The reason is that the energy equation we have used does
5681 not include a term that describes adiabatic cooling and heating:
5682 rock, like gas, heats up as you
compress it. Consequently, material
5683 that rises up cools adiabatically, and cold material that sinks down
5684 heats adiabatically. The correct temperature equation would
5685 therefore look somewhat like
this:
5689 \nabla \cdot \kappa \nabla
T &=& \
gamma + \tau\frac{Dp}{Dt},
5691 or, expanding the advected derivative @f$\frac{D}{Dt} =
5692 \frac{\partial}{\partial t} + \mathbf u \cdot \nabla@f$:
5694 \frac{\partial
T}{\partial t}
5696 {\mathbf u} \cdot \nabla
T
5698 \nabla \cdot \kappa \nabla
T &=& \
gamma +
5699 \tau\left\{\frac{\partial
5700 p}{\partial t} + \mathbf u \cdot \nabla p \right\}.
5702 In other words, as pressure increases in a rock
volume
5703 (@f$\frac{Dp}{Dt}>0@f$) we get an additional heat source, and vice
5706 The time derivative of the pressure is a bit awkward to
5707 implement. If necessary, one could approximate
using the fact
5708 outlined in the introduction that the pressure can be decomposed
5709 into a dynamic component due to temperature differences and the
5710 resulting flow, and a
static component that results solely from the
5711 static pressure of the overlying rock. Since the latter is much
5712 bigger, one may approximate @f$p\approx p_{\text{
static}}=-\rho_{\text{ref}}
5713 [1+\beta T_{\text{ref}}] \varphi@f$, and consequently
5714 @f$\frac{Dp}{Dt} \approx \left\{- \mathbf u \cdot \nabla \rho_{\text{ref}}
5715 [1+\beta T_{\text{ref}}]\varphi\right\} = \rho_{\text{ref}}
5716 [1+\beta T_{\text{ref}}] \mathbf u \cdot \mathbf g@f$.
5717 In other words,
if the fluid is moving in the direction of gravity
5718 (downward) it will be compressed and because in that
case @f$\mathbf u
5719 \cdot \mathbf g > 0@f$ we get a
positive heat source. Conversely, the
5720 fluid will cool down
if it moves against the direction of gravity.
5722<li> <
b>Compressibility:</
b>
5723 As already hinted at in the temperature model above,
5724 mantle rocks are not incompressible. Rather, given the enormous pressures in
5725 the earth mantle (at the core-mantle boundary, the pressure is approximately
5726 140 GPa, equivalent to 1,400,000 times atmospheric pressure), rock actually
5727 does
compress to something around 1.5 times the density it would have
5728 at surface pressure. Modeling
this presents any number of
5729 difficulties. Primarily, the mass conservation equation is no longer
5730 @f$\textrm{div}\;\mathbf u=0@f$ but should read
5731 @f$\textrm{div}(\rho\mathbf u)=0@f$ where the density @f$\rho@f$ is now no longer
5732 spatially constant but depends on temperature and pressure. A consequence is
5733 that the model is now no longer linear; a linearized version of the Stokes
5734 equation is also no longer
symmetric requiring us to rethink preconditioners
5735 and, possibly, even the discretization. We won
't go into detail here as to
5736 how this can be resolved.
5738<li> <b>Nonlinear material models:</b> As already hinted at in various places,
5739 material parameters such as the density, the viscosity, and the various
5740 thermal parameters are not constant throughout the earth mantle. Rather,
5741 they nonlinearly depend on the pressure and temperature, and in the case of
5742 the viscosity on the strain rate @f$\varepsilon(\mathbf u)@f$. For complicated
5743 models, the only way to solve such models accurately may be to actually
5744 iterate this dependence out in each time step, rather than simply freezing
5745 coefficients at values extrapolated from the previous time step(s).
5747<li> <b>Checkpoint/restart:</b> Running this program in 2d on a number of
5748 processors allows solving realistic models in a day or two. However, in 3d,
5749 compute times are so large that one runs into two typical problems: (i) On
5750 most compute clusters, the queuing system limits run times for individual
5751 jobs are to 2 or 3 days; (ii) losing the results of a computation due to
5752 hardware failures, misconfigurations, or power outages is a shame when
5753 running on hundreds of processors for a couple of days. Both of these
5754 problems can be addressed by periodically saving the state of the program
5755 and, if necessary, restarting the program at this point. This technique is
5756 commonly called <i>checkpoint/restart</i> and it requires that the entire
5757 state of the program is written to a permanent storage location (e.g. a hard
5758 drive). Given the complexity of the data structures of this program, this is
5759 not entirely trivial (it may also involve writing gigabytes or more of
5760 data), but it can be made easier by realizing that one can save the state
5761 between two time steps where it essentially only consists of the mesh and
5762 solution vectors; during restart one would then first re-enumerate degrees
5763 of freedom in the same way as done before and then re-assemble
5764 matrices. Nevertheless, given the distributed nature of the data structures
5765 involved here, saving and restoring the state of a program is not
5766 trivial. An additional complexity is introduced by the fact that one may
5767 want to change the number of processors between runs, for example because
5768 one may wish to continue computing on a mesh that is finer than the one used
5769 to precompute a starting temperature field at an intermediate time.
5771<li> <b>Predictive postprocessing:</b> The point of computations like this is
5772 not simply to solve the equations. Rather, it is typically the exploration
5773 of different physical models and their comparison with things that we can
5774 measure at the earth surface, in order to find which models are realistic
5775 and which are contradicted by reality. To this end, we need to compute
5776 quantities from our solution vectors that are related to what we can
5777 observe. Among these are, for example, heatfluxes at the surface of the
5778 earth, as well as seismic velocities throughout the mantle as these affect
5779 earthquake waves that are recorded by seismographs.
5781<li> <b>Better refinement criteria:</b> As can be seen above for the
57823d case, the mesh in 3d is primarily refined along the inner
5783boundary. This is because the boundary layer there is stronger than
5784any other transition in the domain, leading us to refine there almost
5785exclusively and basically not at all following the plumes. One
5786certainly needs better refinement criteria to track the parts of the
5787solution we are really interested in better than the criterion used
5788here, namely the KellyErrorEstimator applied to the temperature, is
5793There are many other ways to extend the current program. However, rather than
5794discussing them here, let us point to the much larger open
5795source code ASPECT (see https://aspect.geodynamics.org/ ) that constitutes the
5796further development of @ref step_32 "step-32" and that already includes many such possible
5800<a name="step_32-PlainProg"></a>
5801<h1> The plain program</h1>
5802@include "step-32.cc"
virtual void build_patches(const unsigned int n_subdivisions=0)
void reinit(const Triangulation< dim, spacedim > &tria)
active_cell_iterator begin_active(const unsigned int level=0) const
std::pair< std::pair< unsigned int, unsigned int >, unsigned int > system_to_base_index(const unsigned int index) const
virtual void vector_value(const Point< dim > &p, Vector< RangeNumberType > &values) const
static void estimate(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const Quadrature< dim - 1 > &quadrature, const std::map< types::boundary_id, const Function< spacedim, Number > * > &neumann_bc, const ReadVector< Number > &solution, Vector< float > &error, const ComponentMask &component_mask={}, const Function< spacedim > *coefficients=nullptr, const unsigned int n_threads=numbers::invalid_unsigned_int, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id, const types::material_id material_id=numbers::invalid_material_id, const Strategy strategy=cell_diameter_over_24)
@ smoothing_on_refinement
@ smoothing_on_coarsening
#define Assert(cond, exc)
#define AssertThrow(cond, exc)
TriaActiveIterator< CellAccessor< dim, spacedim > > active_cell_iterator
typename ActiveSelector::active_cell_iterator active_cell_iterator
void loop(IteratorType begin, std_cxx20::type_identity_t< IteratorType > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, AssemblerType &assembler, const LoopControl &lctrl=LoopControl())
const bool IsBlockVector< VectorType >::value
@ update_values
Shape function values.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
MappingQ< dim, spacedim > StaticMappingQ1< dim, spacedim >::mapping
std::vector< value_type > split(const typename ::Triangulation< dim, spacedim >::cell_iterator &parent, const value_type parent_value)
void approximate(const SynchronousIterators< std::tuple< typename DoFHandler< dim, spacedim >::active_cell_iterator, Vector< float >::iterator > > &cell, const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof_handler, const InputVector &solution, const unsigned int component)
Expression sign(const Expression &x)
void hyper_shell(Triangulation< dim, spacedim > &tria, const Point< spacedim > ¢er, const double inner_radius, const double outer_radius, const unsigned int n_cells=0, bool colorize=false)
void refine(Triangulation< dim, spacedim > &tria, const Vector< Number > &criteria, const double threshold, const unsigned int max_to_mark=numbers::invalid_unsigned_int)
@ valid
Iterator points to a valid object.
@ matrix
Contents is actually a matrix.
@ symmetric
Matrix is symmetric.
constexpr types::blas_int zero
constexpr types::blas_int one
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > b(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
VectorType::value_type * end(VectorType &V)
std::vector< unsigned int > serial(const std::vector< unsigned int > &targets, const std::function< RequestType(const unsigned int)> &create_request, const std::function< AnswerType(const unsigned int, const RequestType &)> &answer_request, const std::function< void(const unsigned int, const AnswerType &)> &process_answer, const MPI_Comm comm)
T sum(const T &t, const MPI_Comm mpi_communicator)
T max(const T &t, const MPI_Comm mpi_communicator)
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
std::string compress(const std::string &input)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
void run(const std::vector< std::vector< Iterator > > &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
void abort(const ExceptionBase &exc) noexcept
bool check(const ConstraintKinds kind_in, const unsigned int dim)
long double gamma(const unsigned int n)
int(& functions)(const void *v1, const void *v2)
void refine_and_coarsen_fixed_fraction(::Triangulation< dim, spacedim > &tria, const ::Vector< Number > &criteria, const double top_fraction_of_error, const double bottom_fraction_of_error, const VectorTools::NormType norm_type=VectorTools::L1_norm)
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > cos(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sin(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
unsigned int subdomain_id
std::vector< std::vector< bool > > constant_modes
constexpr ProductType< Number, OtherNumber >::type scalar_product(const Tensor< rank, dim, Number > &left, const Tensor< rank, dim, OtherNumber > &right)