deal.II version 9.7.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
step-51.h
Go to the documentation of this file.
1 = 0) const override
595 *   {
596 *   double sum = 0;
597 *   for (unsigned int i = 0; i < this->n_source_centers; ++i)
598 *   {
599 *   const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
600 *   sum +=
601 *   std::exp(-x_minus_xi.norm_square() / (this->width * this->width));
602 *   }
603 *  
604 *   return sum /
605 *   std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
606 *   }
607 *  
608 *   virtual Tensor<1, dim>
609 *   gradient(const Point<dim> &p,
610 *   const unsigned int /*component*/ = 0) const override
611 *   {
612 *   Tensor<1, dim> sum;
613 *   for (unsigned int i = 0; i < this->n_source_centers; ++i)
614 *   {
615 *   const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
616 *  
617 *   sum +=
618 *   (-2 / (this->width * this->width) *
619 *   std::exp(-x_minus_xi.norm_square() / (this->width * this->width)) *
620 *   x_minus_xi);
621 *   }
622 *  
623 *   return sum /
624 *   std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
625 *   }
626 *   };
627 *  
628 *  
629 *  
630 * @endcode
631 *
632 * This class implements a function where the scalar solution and its negative
633 * gradient are collected together. This function is used when computing the
634 * error of the HDG approximation and its implementation is to simply call
635 * value and gradient function of the Solution class.
636 *
637 * @code
638 *   template <int dim>
639 *   class SolutionAndGradient : public Function<dim>, protected SolutionBase<dim>
640 *   {
641 *   public:
642 *   SolutionAndGradient()
643 *   : Function<dim>(dim + 1)
644 *   {}
645 *  
646 *   virtual void vector_value(const Point<dim> &p,
647 *   Vector<double> &v) const override
648 *   {
649 *   AssertDimension(v.size(), dim + 1);
650 *   Solution<dim> solution;
651 *   Tensor<1, dim> grad = solution.gradient(p);
652 *   for (unsigned int d = 0; d < dim; ++d)
653 *   v[d] = -grad[d];
654 *   v[dim] = solution.value(p);
655 *   }
656 *   };
657 *  
658 *  
659 *  
660 * @endcode
661 *
662 * Next comes the implementation of the convection velocity. As described in
663 * the introduction, we choose a velocity field that is @f$(y, -x)@f$ in 2d and
664 * @f$(y, -x, 1)@f$ in 3d. This gives a divergence-free velocity field.
665 *
666 * @code
667 *   template <int dim>
668 *   class ConvectionVelocity : public TensorFunction<1, dim>
669 *   {
670 *   public:
671 *   ConvectionVelocity()
673 *   {}
674 *  
675 *   virtual Tensor<1, dim> value(const Point<dim> &p) const override
676 *   {
677 *   Tensor<1, dim> convection;
678 *   switch (dim)
679 *   {
680 *   case 1:
681 *   convection[0] = 1;
682 *   break;
683 *   case 2:
684 *   convection[0] = p[1];
685 *   convection[1] = -p[0];
686 *   break;
687 *   case 3:
688 *   convection[0] = p[1];
689 *   convection[1] = -p[0];
690 *   convection[2] = 1;
691 *   break;
692 *   default:
694 *   }
695 *   return convection;
696 *   }
697 *   };
698 *  
699 *  
700 *  
701 * @endcode
702 *
703 * The last function we implement is the right hand side for the
704 * manufactured solution. It is very similar to @ref step_7 "step-7", with the exception
705 * that we now have a convection term instead of the reaction term. Since
706 * the velocity field is incompressible, i.e., @f$\nabla \cdot \mathbf{c} =
707 * 0@f$, the advection term simply reads @f$\mathbf{c} \nabla u@f$.
708 *
709 * @code
710 *   template <int dim>
711 *   class RightHandSide : public Function<dim>, protected SolutionBase<dim>
712 *   {
713 *   public:
714 *   virtual double value(const Point<dim> &p,
715 *   const unsigned int /*component*/ = 0) const override
716 *   {
717 *   ConvectionVelocity<dim> convection_velocity;
718 *   Tensor<1, dim> convection = convection_velocity.value(p);
719 *   double sum = 0;
720 *   for (unsigned int i = 0; i < this->n_source_centers; ++i)
721 *   {
722 *   const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
723 *  
724 *   sum +=
725 *   ((2 * dim - 2 * convection * x_minus_xi -
726 *   4 * x_minus_xi.norm_square() / (this->width * this->width)) /
727 *   (this->width * this->width) *
728 *   std::exp(-x_minus_xi.norm_square() / (this->width * this->width)));
729 *   }
730 *  
731 *   return sum /
732 *   std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
733 *   }
734 *   };
735 *  
736 *  
737 *  
738 * @endcode
739 *
740 *
741 * <a name="step_51-TheHDGsolverclass"></a>
742 * <h3>The HDG solver class</h3>
743 *
744
745 *
746 * The HDG solution procedure follows closely that of @ref step_7 "step-7". The major
747 * difference is the use of three different sets of DoFHandler and FE
748 * objects, along with the ChunkSparseMatrix and the corresponding solutions
749 * vectors. We also use WorkStream to enable a multithreaded local solution
750 * process which exploits the embarrassingly parallel nature of the local
751 * solver. For WorkStream, we define the local operations on a cell and a
752 * copy function into the global matrix and vector. We do this both for the
753 * assembly (which is run twice, once when we generate the system matrix and
754 * once when we compute the element-interior solutions from the skeleton
755 * values) and for the postprocessing where we extract a solution that
756 * converges at higher order.
757 *
758 * @code
759 *   template <int dim>
760 *   class HDG
761 *   {
762 *   public:
763 *   enum RefinementMode
764 *   {
765 *   global_refinement,
766 *   adaptive_refinement
767 *   };
768 *  
769 *   HDG(const unsigned int degree, const RefinementMode refinement_mode);
770 *   void run();
771 *  
772 *   private:
773 *   void setup_system();
774 *   void assemble_system(const bool reconstruct_trace = false);
775 *   void solve();
776 *   void postprocess();
777 *   void refine_grid(const unsigned int cycle);
778 *   void output_results(const unsigned int cycle);
779 *  
780 * @endcode
781 *
782 * Data for the assembly and solution of the primal variables.
783 *
784 * @code
785 *   struct PerTaskData;
786 *   struct ScratchData;
787 *  
788 * @endcode
789 *
790 * Post-processing the solution to obtain @f$u^*@f$ is an element-by-element
791 * procedure; as such, we do not need to assemble any global data and do
792 * not declare any 'task data' for WorkStream to use.
793 *
794 * @code
795 *   struct PostProcessScratchData;
796 *  
797 * @endcode
798 *
799 * The following three functions are used by WorkStream to do the actual
800 * work of the program.
801 *
802 * @code
803 *   void assemble_system_one_cell(
804 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
805 *   ScratchData &scratch,
806 *   PerTaskData &task_data);
807 *  
808 *   void copy_local_to_global(const PerTaskData &data);
809 *  
810 *   void postprocess_one_cell(
811 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
812 *   PostProcessScratchData &scratch,
813 *   unsigned int &empty_data);
814 *  
815 *  
816 *   Triangulation<dim> triangulation;
817 *  
818 * @endcode
819 *
820 * The 'local' solutions are interior to each element. These
821 * represent the primal solution field @f$u@f$ as well as the auxiliary
822 * field @f$\mathbf{q}@f$.
823 *
824 * @code
825 *   const FESystem<dim> fe_local;
826 *   DoFHandler<dim> dof_handler_local;
827 *   Vector<double> solution_local;
828 *  
829 * @endcode
830 *
831 * The new finite element type and corresponding <code>DoFHandler</code> are
832 * used for the global skeleton solution that couples the element-level
833 * local solutions.
834 *
835 * @code
836 *   const FE_FaceQ<dim> fe;
837 *   DoFHandler<dim> dof_handler;
838 *   Vector<double> solution;
839 *   Vector<double> system_rhs;
840 *  
841 * @endcode
842 *
843 * As stated in the introduction, HDG solutions can be post-processed to
844 * attain superconvergence rates of @f$\mathcal{O}(h^{p+2})@f$. The
845 * post-processed solution is a discontinuous finite element solution
846 * representing the primal variable on the interior of each cell. We define
847 * a FE type of degree @f$p+1@f$ to represent this post-processed solution,
848 * which we only use for output after constructing it.
849 *
850 * @code
851 *   const FE_DGQ<dim> fe_u_post;
852 *   DoFHandler<dim> dof_handler_u_post;
853 *   Vector<double> solution_u_post;
854 *  
855 * @endcode
856 *
857 * The degrees of freedom corresponding to the skeleton strongly enforce
858 * Dirichlet boundary conditions, just as in a continuous Galerkin finite
859 * element method. We can enforce the boundary conditions in an analogous
860 * manner via an AffineConstraints object. In addition, hanging nodes are
861 * handled in the same way as for continuous finite elements: For the face
862 * elements which only define degrees of freedom on the face, this process
863 * sets the solution on the refined side to coincide with the
864 * representation on the coarse side.
865 *
866
867 *
868 * Note that for HDG, the elimination of hanging nodes is not the only
869 * possibility &mdash; in terms of the HDG theory, one could also use the
870 * unknowns from the refined side and express the local solution on the
871 * coarse side through the trace values on the refined side. However, such
872 * a setup is not as easily implemented in terms of deal.II loops and not
873 * further analyzed.
874 *
875 * @code
876 *   AffineConstraints<double> constraints;
877 *  
878 * @endcode
879 *
880 * The usage of the ChunkSparseMatrix class is similar to the usual sparse
881 * matrices: You need a sparsity pattern of type ChunkSparsityPattern and
882 * the actual matrix object. When creating the sparsity pattern, we just
883 * have to additionally pass the size of local blocks.
884 *
885 * @code
886 *   ChunkSparsityPattern sparsity_pattern;
887 *   ChunkSparseMatrix<double> system_matrix;
888 *  
889 * @endcode
890 *
891 * Same as @ref step_7 "step-7":
892 *
893 * @code
894 *   const RefinementMode refinement_mode;
895 *   ConvergenceTable convergence_table;
896 *   };
897 *  
898 * @endcode
899 *
900 *
901 * <a name="step_51-TheHDGclassimplementation"></a>
902 * <h3>The HDG class implementation</h3>
903 *
904
905 *
906 *
907 * <a name="step_51-Constructor"></a>
908 * <h4>Constructor</h4>
909 * The constructor is similar to those in other examples, with the exception
910 * of handling multiple DoFHandler and FiniteElement objects. Note that we
911 * create a system of finite elements for the local DG part, including the
912 * gradient/flux part and the scalar part.
913 *
914 * @code
915 *   template <int dim>
916 *   HDG<dim>::HDG(const unsigned int degree, const RefinementMode refinement_mode)
917 *   : fe_local(FE_DGQ<dim>(degree) ^ dim, FE_DGQ<dim>(degree))
918 *   , dof_handler_local(triangulation)
919 *   , fe(degree)
920 *   , dof_handler(triangulation)
921 *   , fe_u_post(degree + 1)
922 *   , dof_handler_u_post(triangulation)
923 *   , refinement_mode(refinement_mode)
924 *   {}
925 *  
926 *  
927 *  
928 * @endcode
929 *
930 *
931 * <a name="step_51-HDGsetup_system"></a>
932 * <h4>HDG::setup_system</h4>
933 * The system for an HDG solution is setup in an analogous manner to most
934 * of the other tutorial programs. We are careful to distribute dofs with
935 * all of our DoFHandler objects. The @p solution and @p system_matrix
936 * objects go with the global skeleton solution.
937 *
938 * @code
939 *   template <int dim>
940 *   void HDG<dim>::setup_system()
941 *   {
942 *   dof_handler_local.distribute_dofs(fe_local);
943 *   dof_handler.distribute_dofs(fe);
944 *   dof_handler_u_post.distribute_dofs(fe_u_post);
945 *  
946 *   std::cout << " Number of degrees of freedom: " << dof_handler.n_dofs()
947 *   << std::endl;
948 *  
949 *   solution.reinit(dof_handler.n_dofs());
950 *   system_rhs.reinit(dof_handler.n_dofs());
951 *  
952 *   solution_local.reinit(dof_handler_local.n_dofs());
953 *   solution_u_post.reinit(dof_handler_u_post.n_dofs());
954 *  
955 *   constraints.clear();
956 *   DoFTools::make_hanging_node_constraints(dof_handler, constraints);
957 *   std::map<types::boundary_id, const Function<dim> *> boundary_functions;
958 *   Solution<dim> solution_function;
959 *   boundary_functions[0] = &solution_function;
961 *   boundary_functions,
962 *   QGauss<dim - 1>(fe.degree + 1),
963 *   constraints);
964 *   constraints.close();
965 *  
966 * @endcode
967 *
968 * When creating the chunk sparsity pattern, we first create the usual
969 * dynamic sparsity pattern and then set the chunk size, which is equal
970 * to the number of dofs on a face, when copying this into the final
971 * sparsity pattern.
972 *
973 * @code
974 *   {
975 *   DynamicSparsityPattern dsp(dof_handler.n_dofs());
976 *   DoFTools::make_sparsity_pattern(dof_handler, dsp, constraints, false);
977 *   sparsity_pattern.copy_from(dsp, fe.n_dofs_per_face());
978 *   }
979 *   system_matrix.reinit(sparsity_pattern);
980 *   }
981 *  
982 *  
983 *  
984 * @endcode
985 *
986 *
987 * <a name="step_51-HDGPerTaskData"></a>
988 * <h4>HDG::PerTaskData</h4>
989 * Next comes the definition of the local data structures for the parallel
990 * assembly. The first structure @p PerTaskData contains the local vector
991 * and matrix that are written into the global matrix, whereas the
992 * ScratchData contains all data that we need for the local assembly. There
993 * is one variable worth noting here, namely the boolean variable @p
994 * trace_reconstruct. As mentioned in the introduction, we solve the HDG
995 * system in two steps. First, we create a linear system for the skeleton
996 * system where we condense the local part into it via the Schur complement
997 * @f$D-CA^{-1}B@f$. Then, we solve for the local part using the skeleton
998 * solution. For these two steps, we need the same matrices on the elements
999 * twice, which we want to compute by two assembly steps. Since most of the
1000 * code is similar, we do this with the same function but only switch
1001 * between the two based on a flag that we set when starting the
1002 * assembly. Since we need to pass this information on to the local worker
1003 * routines, we store it once in the task data.
1004 *
1005 * @code
1006 *   template <int dim>
1007 *   struct HDG<dim>::PerTaskData
1008 *   {
1009 *   FullMatrix<double> cell_matrix;
1010 *   Vector<double> cell_vector;
1011 *   std::vector<types::global_dof_index> dof_indices;
1012 *  
1013 *   bool trace_reconstruct;
1014 *  
1015 *   PerTaskData(const unsigned int n_dofs, const bool trace_reconstruct)
1016 *   : cell_matrix(n_dofs, n_dofs)
1017 *   , cell_vector(n_dofs)
1018 *   , dof_indices(n_dofs)
1019 *   , trace_reconstruct(trace_reconstruct)
1020 *   {}
1021 *   };
1022 *  
1023 *  
1024 *  
1025 * @endcode
1026 *
1027 *
1028 * <a name="step_51-HDGScratchData"></a>
1029 * <h4>HDG::ScratchData</h4>
1030 * @p ScratchData contains persistent data for each
1031 * thread within WorkStream. The FEValues, matrix,
1032 * and vector objects should be familiar by now. There are two objects that
1033 * need to be discussed: `std::vector<std::vector<unsigned int> >
1034 * fe_local_support_on_face` and `std::vector<std::vector<unsigned int> >
1035 * fe_support_on_face`. These are used to indicate whether or not the finite
1036 * elements chosen have support (non-zero values) on a given face of the
1037 * reference cell for the local part associated to @p fe_local and the
1038 * skeleton part @p fe. We extract this information in the
1039 * constructor and store it once for all cells that we work on. Had we not
1040 * stored this information, we would be forced to assemble a large number of
1041 * zero terms on each cell, which would significantly slow the program.
1042 *
1043 * @code
1044 *   template <int dim>
1045 *   struct HDG<dim>::ScratchData
1046 *   {
1047 *   FEValues<dim> fe_values_local;
1048 *   FEFaceValues<dim> fe_face_values_local;
1049 *   FEFaceValues<dim> fe_face_values;
1050 *  
1051 *   FullMatrix<double> ll_matrix;
1052 *   FullMatrix<double> lf_matrix;
1053 *   FullMatrix<double> fl_matrix;
1054 *   FullMatrix<double> tmp_matrix;
1055 *   Vector<double> l_rhs;
1056 *   Vector<double> tmp_rhs;
1057 *  
1058 *   std::vector<Tensor<1, dim>> q_phi;
1059 *   std::vector<double> q_phi_div;
1060 *   std::vector<double> u_phi;
1061 *   std::vector<Tensor<1, dim>> u_phi_grad;
1062 *   std::vector<double> tr_phi;
1063 *   std::vector<double> trace_values;
1064 *  
1065 *   std::vector<std::vector<unsigned int>> fe_local_support_on_face;
1066 *   std::vector<std::vector<unsigned int>> fe_support_on_face;
1067 *  
1068 *   ConvectionVelocity<dim> convection_velocity;
1069 *   RightHandSide<dim> right_hand_side;
1070 *   const Solution<dim> exact_solution;
1071 *  
1072 *   ScratchData(const FiniteElement<dim> &fe,
1073 *   const FiniteElement<dim> &fe_local,
1074 *   const QGauss<dim> &quadrature_formula,
1075 *   const QGauss<dim - 1> &face_quadrature_formula,
1076 *   const UpdateFlags local_flags,
1077 *   const UpdateFlags local_face_flags,
1078 *   const UpdateFlags flags)
1079 *   : fe_values_local(fe_local, quadrature_formula, local_flags)
1080 *   , fe_face_values_local(fe_local,
1081 *   face_quadrature_formula,
1082 *   local_face_flags)
1083 *   , fe_face_values(fe, face_quadrature_formula, flags)
1084 *   , ll_matrix(fe_local.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1085 *   , lf_matrix(fe_local.n_dofs_per_cell(), fe.n_dofs_per_cell())
1086 *   , fl_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1087 *   , tmp_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1088 *   , l_rhs(fe_local.n_dofs_per_cell())
1089 *   , tmp_rhs(fe_local.n_dofs_per_cell())
1090 *   , q_phi(fe_local.n_dofs_per_cell())
1091 *   , q_phi_div(fe_local.n_dofs_per_cell())
1092 *   , u_phi(fe_local.n_dofs_per_cell())
1093 *   , u_phi_grad(fe_local.n_dofs_per_cell())
1094 *   , tr_phi(fe.n_dofs_per_cell())
1095 *   , trace_values(face_quadrature_formula.size())
1096 *   , fe_local_support_on_face(GeometryInfo<dim>::faces_per_cell)
1097 *   , fe_support_on_face(GeometryInfo<dim>::faces_per_cell)
1098 *   , exact_solution()
1099 *   {
1100 *   for (const unsigned int face_no : GeometryInfo<dim>::face_indices())
1101 *   for (unsigned int i = 0; i < fe_local.n_dofs_per_cell(); ++i)
1102 *   {
1103 *   if (fe_local.has_support_on_face(i, face_no))
1104 *   fe_local_support_on_face[face_no].push_back(i);
1105 *   }
1106 *  
1107 *   for (const unsigned int face_no : GeometryInfo<dim>::face_indices())
1108 *   for (unsigned int i = 0; i < fe.n_dofs_per_cell(); ++i)
1109 *   {
1110 *   if (fe.has_support_on_face(i, face_no))
1111 *   fe_support_on_face[face_no].push_back(i);
1112 *   }
1113 *   }
1114 *  
1115 *   ScratchData(const ScratchData &sd)
1116 *   : fe_values_local(sd.fe_values_local.get_fe(),
1117 *   sd.fe_values_local.get_quadrature(),
1118 *   sd.fe_values_local.get_update_flags())
1119 *   , fe_face_values_local(sd.fe_face_values_local.get_fe(),
1120 *   sd.fe_face_values_local.get_quadrature(),
1121 *   sd.fe_face_values_local.get_update_flags())
1122 *   , fe_face_values(sd.fe_face_values.get_fe(),
1123 *   sd.fe_face_values.get_quadrature(),
1124 *   sd.fe_face_values.get_update_flags())
1125 *   , ll_matrix(sd.ll_matrix)
1126 *   , lf_matrix(sd.lf_matrix)
1127 *   , fl_matrix(sd.fl_matrix)
1128 *   , tmp_matrix(sd.tmp_matrix)
1129 *   , l_rhs(sd.l_rhs)
1130 *   , tmp_rhs(sd.tmp_rhs)
1131 *   , q_phi(sd.q_phi)
1132 *   , q_phi_div(sd.q_phi_div)
1133 *   , u_phi(sd.u_phi)
1134 *   , u_phi_grad(sd.u_phi_grad)
1135 *   , tr_phi(sd.tr_phi)
1136 *   , trace_values(sd.trace_values)
1137 *   , fe_local_support_on_face(sd.fe_local_support_on_face)
1138 *   , fe_support_on_face(sd.fe_support_on_face)
1139 *   , exact_solution()
1140 *   {}
1141 *   };
1142 *  
1143 *  
1144 *  
1145 * @endcode
1146 *
1147 *
1148 * <a name="step_51-HDGPostProcessScratchData"></a>
1149 * <h4>HDG::PostProcessScratchData</h4>
1150 * @p PostProcessScratchData contains the data used by WorkStream
1151 * when post-processing the local solution @f$u^*@f$. It is similar, but much
1152 * simpler, than @p ScratchData.
1153 *
1154 * @code
1155 *   template <int dim>
1156 *   struct HDG<dim>::PostProcessScratchData
1157 *   {
1158 *   FEValues<dim> fe_values_local;
1159 *   FEValues<dim> fe_values;
1160 *  
1161 *   std::vector<double> u_values;
1162 *   std::vector<Tensor<1, dim>> u_gradients;
1163 *   FullMatrix<double> cell_matrix;
1164 *  
1165 *   Vector<double> cell_rhs;
1166 *   Vector<double> cell_sol;
1167 *  
1168 *   PostProcessScratchData(const FiniteElement<dim> &fe,
1169 *   const FiniteElement<dim> &fe_local,
1170 *   const QGauss<dim> &quadrature_formula,
1171 *   const UpdateFlags local_flags,
1172 *   const UpdateFlags flags)
1173 *   : fe_values_local(fe_local, quadrature_formula, local_flags)
1174 *   , fe_values(fe, quadrature_formula, flags)
1175 *   , u_values(quadrature_formula.size())
1176 *   , u_gradients(quadrature_formula.size())
1177 *   , cell_matrix(fe.n_dofs_per_cell(), fe.n_dofs_per_cell())
1178 *   , cell_rhs(fe.n_dofs_per_cell())
1179 *   , cell_sol(fe.n_dofs_per_cell())
1180 *   {}
1181 *  
1182 *   PostProcessScratchData(const PostProcessScratchData &sd)
1183 *   : fe_values_local(sd.fe_values_local.get_fe(),
1184 *   sd.fe_values_local.get_quadrature(),
1185 *   sd.fe_values_local.get_update_flags())
1186 *   , fe_values(sd.fe_values.get_fe(),
1187 *   sd.fe_values.get_quadrature(),
1188 *   sd.fe_values.get_update_flags())
1189 *   , u_values(sd.u_values)
1190 *   , u_gradients(sd.u_gradients)
1191 *   , cell_matrix(sd.cell_matrix)
1192 *   , cell_rhs(sd.cell_rhs)
1193 *   , cell_sol(sd.cell_sol)
1194 *   {}
1195 *   };
1196 *  
1197 *  
1198 *  
1199 * @endcode
1200 *
1201 *
1202 * <a name="step_51-HDGassemble_system"></a>
1203 * <h4>HDG::assemble_system</h4>
1204 * The @p assemble_system function is similar to the one on @ref step_32 "step-32", where
1205 * the quadrature formula and the update flags are set up, and then
1206 * <code>WorkStream</code> is used to do the work in a multi-threaded
1207 * manner. The @p trace_reconstruct input parameter is used to decide
1208 * whether we are solving for the global skeleton solution (false) or the
1209 * local solution (true).
1210 *
1211
1212 *
1213 * One thing worth noting for the multi-threaded execution of assembly is
1214 * the fact that the local computations in `assemble_system_one_cell()` call
1215 * into BLAS and LAPACK functions if those are available in deal.II. Thus,
1216 * the underlying BLAS/LAPACK library must support calls from multiple
1217 * threads at the same time. Most implementations do support this, but some
1218 * libraries need to be built in a specific way to avoid problems. For
1219 * example, OpenBLAS compiled without multithreading inside the BLAS/LAPACK
1220 * calls needs to built with a flag called `USE_LOCKING` set to true.
1221 *
1222 * @code
1223 *   template <int dim>
1224 *   void HDG<dim>::assemble_system(const bool trace_reconstruct)
1225 *   {
1226 *   const QGauss<dim> quadrature_formula(fe.degree + 1);
1227 *   const QGauss<dim - 1> face_quadrature_formula(fe.degree + 1);
1228 *  
1229 *   const UpdateFlags local_flags(update_values | update_gradients |
1231 *  
1232 *   const UpdateFlags local_face_flags(update_values);
1233 *  
1236 *  
1237 *   PerTaskData task_data(fe.n_dofs_per_cell(), trace_reconstruct);
1238 *   ScratchData scratch(fe,
1239 *   fe_local,
1240 *   quadrature_formula,
1241 *   face_quadrature_formula,
1242 *   local_flags,
1243 *   local_face_flags,
1244 *   flags);
1245 *  
1246 *   WorkStream::run(dof_handler.begin_active(),
1247 *   dof_handler.end(),
1248 *   *this,
1249 *   &HDG<dim>::assemble_system_one_cell,
1250 *   &HDG<dim>::copy_local_to_global,
1251 *   scratch,
1252 *   task_data);
1253 *   }
1254 *  
1255 *  
1256 *  
1257 * @endcode
1258 *
1259 *
1260 * <a name="step_51-HDGassemble_system_one_cell"></a>
1261 * <h4>HDG::assemble_system_one_cell</h4>
1262 * The real work of the HDG program is done by @p assemble_system_one_cell.
1263 * Assembling the local matrices @f$A, B, C@f$ is done here, along with the
1264 * local contributions of the global matrix @f$D@f$.
1265 *
1266 * @code
1267 *   template <int dim>
1268 *   void HDG<dim>::assemble_system_one_cell(
1269 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
1270 *   ScratchData &scratch,
1271 *   PerTaskData &task_data)
1272 *   {
1273 * @endcode
1274 *
1275 * Construct iterator for dof_handler_local for FEValues reinit function.
1276 *
1277 * @code
1278 *   const typename DoFHandler<dim>::active_cell_iterator loc_cell =
1279 *   cell->as_dof_handler_iterator(dof_handler_local);
1280 *  
1281 *   const unsigned int n_q_points =
1282 *   scratch.fe_values_local.get_quadrature().size();
1283 *   const unsigned int n_face_q_points =
1284 *   scratch.fe_face_values_local.get_quadrature().size();
1285 *  
1286 *   const unsigned int loc_dofs_per_cell =
1287 *   scratch.fe_values_local.get_fe().n_dofs_per_cell();
1288 *  
1289 *   const FEValuesExtractors::Vector fluxes(0);
1290 *   const FEValuesExtractors::Scalar scalar(dim);
1291 *  
1292 *   scratch.ll_matrix = 0;
1293 *   scratch.l_rhs = 0;
1294 *   if (!task_data.trace_reconstruct)
1295 *   {
1296 *   scratch.lf_matrix = 0;
1297 *   scratch.fl_matrix = 0;
1298 *   task_data.cell_matrix = 0;
1299 *   task_data.cell_vector = 0;
1300 *   }
1301 *   scratch.fe_values_local.reinit(loc_cell);
1302 *  
1303 * @endcode
1304 *
1305 * We first compute the cell-interior contribution to @p ll_matrix matrix
1306 * (referred to as matrix @f$A@f$ in the introduction) corresponding to
1307 * local-local coupling, as well as the local right-hand-side vector. We
1308 * store the values at each quadrature point for the basis functions, the
1309 * right-hand-side value, and the convection velocity, in order to have
1310 * quick access to these fields.
1311 *
1312 * @code
1313 *   for (unsigned int q = 0; q < n_q_points; ++q)
1314 *   {
1315 *   const double rhs_value = scratch.right_hand_side.value(
1316 *   scratch.fe_values_local.quadrature_point(q));
1317 *   const Tensor<1, dim> convection = scratch.convection_velocity.value(
1318 *   scratch.fe_values_local.quadrature_point(q));
1319 *   const double JxW = scratch.fe_values_local.JxW(q);
1320 *   for (unsigned int k = 0; k < loc_dofs_per_cell; ++k)
1321 *   {
1322 *   scratch.q_phi[k] = scratch.fe_values_local[fluxes].value(k, q);
1323 *   scratch.q_phi_div[k] =
1324 *   scratch.fe_values_local[fluxes].divergence(k, q);
1325 *   scratch.u_phi[k] = scratch.fe_values_local[scalar].value(k, q);
1326 *   scratch.u_phi_grad[k] =
1327 *   scratch.fe_values_local[scalar].gradient(k, q);
1328 *   }
1329 *   for (unsigned int i = 0; i < loc_dofs_per_cell; ++i)
1330 *   {
1331 *   for (unsigned int j = 0; j < loc_dofs_per_cell; ++j)
1332 *   scratch.ll_matrix(i, j) +=
1333 *   (scratch.q_phi[i] * scratch.q_phi[j] -
1334 *   scratch.q_phi_div[i] * scratch.u_phi[j] +
1335 *   scratch.u_phi[i] * scratch.q_phi_div[j] -
1336 *   (scratch.u_phi_grad[i] * convection) * scratch.u_phi[j]) *
1337 *   JxW;
1338 *   scratch.l_rhs(i) += scratch.u_phi[i] * rhs_value * JxW;
1339 *   }
1340 *   }
1341 *  
1342 * @endcode
1343 *
1344 * Face terms are assembled on all faces of all elements. This is in
1345 * contrast to more traditional DG methods, where each face is only visited
1346 * once in the assembly procedure.
1347 *
1348 * @code
1349 *   for (const auto face_no : cell->face_indices())
1350 *   {
1351 *   scratch.fe_face_values_local.reinit(loc_cell, face_no);
1352 *   scratch.fe_face_values.reinit(cell, face_no);
1353 *  
1354 * @endcode
1355 *
1356 * The already obtained @f$\hat{u}@f$ values are needed when solving for the
1357 * local variables.
1358 *
1359 * @code
1360 *   if (task_data.trace_reconstruct)
1361 *   scratch.fe_face_values.get_function_values(solution,
1362 *   scratch.trace_values);
1363 *  
1364 *   for (unsigned int q = 0; q < n_face_q_points; ++q)
1365 *   {
1366 *   const double JxW = scratch.fe_face_values.JxW(q);
1367 *   const Point<dim> quadrature_point =
1368 *   scratch.fe_face_values.quadrature_point(q);
1369 *   const Tensor<1, dim> normal =
1370 *   scratch.fe_face_values.normal_vector(q);
1371 *   const Tensor<1, dim> convection =
1372 *   scratch.convection_velocity.value(quadrature_point);
1373 *  
1374 * @endcode
1375 *
1376 * Here we compute the stabilization parameter discussed in the
1377 * introduction: since the diffusion is one and the diffusion
1378 * length scale is set to 1/5, it simply results in a contribution
1379 * of 5 for the diffusion part and the magnitude of convection
1380 * through the element boundary in a centered scheme for the
1381 * convection part.
1382 *
1383 * @code
1384 *   const double tau_stab = (5. + std::abs(convection * normal));
1385 *  
1386 * @endcode
1387 *
1388 * We store the non-zero flux and scalar values, making use of the
1389 * support_on_face information we created in @p ScratchData.
1390 *
1391 * @code
1392 *   for (unsigned int k = 0;
1393 *   k < scratch.fe_local_support_on_face[face_no].size();
1394 *   ++k)
1395 *   {
1396 *   const unsigned int kk =
1397 *   scratch.fe_local_support_on_face[face_no][k];
1398 *   scratch.q_phi[k] =
1399 *   scratch.fe_face_values_local[fluxes].value(kk, q);
1400 *   scratch.u_phi[k] =
1401 *   scratch.fe_face_values_local[scalar].value(kk, q);
1402 *   }
1403 *  
1404 * @endcode
1405 *
1406 * When @p trace_reconstruct=false, we are preparing to assemble the
1407 * system for the skeleton variable @f$\hat{u}@f$. If this is the case,
1408 * we must assemble all local matrices associated with the problem:
1409 * local-local, local-face, face-local, and face-face. The
1410 * face-face matrix is stored as @p TaskData::cell_matrix, so that
1411 * it can be assembled into the global system by @p
1412 * copy_local_to_global.
1413 *
1414 * @code
1415 *   if (!task_data.trace_reconstruct)
1416 *   {
1417 *   for (unsigned int k = 0;
1418 *   k < scratch.fe_support_on_face[face_no].size();
1419 *   ++k)
1420 *   scratch.tr_phi[k] = scratch.fe_face_values.shape_value(
1421 *   scratch.fe_support_on_face[face_no][k], q);
1422 *   for (unsigned int i = 0;
1423 *   i < scratch.fe_local_support_on_face[face_no].size();
1424 *   ++i)
1425 *   for (unsigned int j = 0;
1426 *   j < scratch.fe_support_on_face[face_no].size();
1427 *   ++j)
1428 *   {
1429 *   const unsigned int ii =
1430 *   scratch.fe_local_support_on_face[face_no][i];
1431 *   const unsigned int jj =
1432 *   scratch.fe_support_on_face[face_no][j];
1433 *   scratch.lf_matrix(ii, jj) +=
1434 *   ((scratch.q_phi[i] * normal +
1435 *   (convection * normal - tau_stab) * scratch.u_phi[i]) *
1436 *   scratch.tr_phi[j]) *
1437 *   JxW;
1438 *  
1439 * @endcode
1440 *
1441 * Note the sign of the face_no-local matrix. We negate
1442 * the sign during assembly here so that we can use the
1443 * FullMatrix::mmult with addition when computing the
1444 * Schur complement.
1445 *
1446 * @code
1447 *   scratch.fl_matrix(jj, ii) -=
1448 *   ((scratch.q_phi[i] * normal +
1449 *   tau_stab * scratch.u_phi[i]) *
1450 *   scratch.tr_phi[j]) *
1451 *   JxW;
1452 *   }
1453 *  
1454 *   for (unsigned int i = 0;
1455 *   i < scratch.fe_support_on_face[face_no].size();
1456 *   ++i)
1457 *   for (unsigned int j = 0;
1458 *   j < scratch.fe_support_on_face[face_no].size();
1459 *   ++j)
1460 *   {
1461 *   const unsigned int ii =
1462 *   scratch.fe_support_on_face[face_no][i];
1463 *   const unsigned int jj =
1464 *   scratch.fe_support_on_face[face_no][j];
1465 *   task_data.cell_matrix(ii, jj) +=
1466 *   ((convection * normal - tau_stab) * scratch.tr_phi[i] *
1467 *   scratch.tr_phi[j]) *
1468 *   JxW;
1469 *   }
1470 *  
1471 *   if (cell->face(face_no)->at_boundary() &&
1472 *   (cell->face(face_no)->boundary_id() == 1))
1473 *   {
1474 *   const double neumann_value =
1475 *   -scratch.exact_solution.gradient(quadrature_point) *
1476 *   normal +
1477 *   convection * normal *
1478 *   scratch.exact_solution.value(quadrature_point);
1479 *   for (unsigned int i = 0;
1480 *   i < scratch.fe_support_on_face[face_no].size();
1481 *   ++i)
1482 *   {
1483 *   const unsigned int ii =
1484 *   scratch.fe_support_on_face[face_no][i];
1485 *   task_data.cell_vector(ii) +=
1486 *   scratch.tr_phi[i] * neumann_value * JxW;
1487 *   }
1488 *   }
1489 *   }
1490 *  
1491 * @endcode
1492 *
1493 * This last term adds the contribution of the term @f$\left<w,\tau
1494 * u_h\right>_{\partial \mathcal T}@f$ to the local matrix. As opposed
1495 * to the face matrices above, we need it in both assembly stages.
1496 *
1497 * @code
1498 *   for (unsigned int i = 0;
1499 *   i < scratch.fe_local_support_on_face[face_no].size();
1500 *   ++i)
1501 *   for (unsigned int j = 0;
1502 *   j < scratch.fe_local_support_on_face[face_no].size();
1503 *   ++j)
1504 *   {
1505 *   const unsigned int ii =
1506 *   scratch.fe_local_support_on_face[face_no][i];
1507 *   const unsigned int jj =
1508 *   scratch.fe_local_support_on_face[face_no][j];
1509 *   scratch.ll_matrix(ii, jj) +=
1510 *   tau_stab * scratch.u_phi[i] * scratch.u_phi[j] * JxW;
1511 *   }
1512 *  
1513 * @endcode
1514 *
1515 * When @p trace_reconstruct=true, we are solving for the local
1516 * solutions on an element by element basis. The local
1517 * right-hand-side is calculated by replacing the basis functions @p
1518 * tr_phi in the @p lf_matrix computation by the computed values @p
1519 * trace_values. Of course, the sign of the matrix is now minus
1520 * since we have moved everything to the other side of the equation.
1521 *
1522 * @code
1523 *   if (task_data.trace_reconstruct)
1524 *   for (unsigned int i = 0;
1525 *   i < scratch.fe_local_support_on_face[face_no].size();
1526 *   ++i)
1527 *   {
1528 *   const unsigned int ii =
1529 *   scratch.fe_local_support_on_face[face_no][i];
1530 *   scratch.l_rhs(ii) -=
1531 *   (scratch.q_phi[i] * normal +
1532 *   scratch.u_phi[i] * (convection * normal - tau_stab)) *
1533 *   scratch.trace_values[q] * JxW;
1534 *   }
1535 *   }
1536 *   }
1537 *  
1538 * @endcode
1539 *
1540 * Once assembly of all of the local contributions is complete, we must
1541 * either: (1) assemble the global system, or (2) compute the local solution
1542 * values and save them. In either case, the first step is to invert the
1543 * local-local matrix.
1544 *
1545 * @code
1546 *   scratch.ll_matrix.gauss_jordan();
1547 *  
1548 * @endcode
1549 *
1550 * For (1), we compute the Schur complement and add it to the @p
1551 * cell_matrix, matrix @f$D@f$ in the introduction.
1552 *
1553 * @code
1554 *   if (task_data.trace_reconstruct == false)
1555 *   {
1556 *   scratch.fl_matrix.mmult(scratch.tmp_matrix, scratch.ll_matrix);
1557 *   scratch.tmp_matrix.vmult_add(task_data.cell_vector, scratch.l_rhs);
1558 *   scratch.tmp_matrix.mmult(task_data.cell_matrix,
1559 *   scratch.lf_matrix,
1560 *   true);
1561 *   cell->get_dof_indices(task_data.dof_indices);
1562 *   }
1563 * @endcode
1564 *
1565 * For (2), we are simply solving (ll_matrix).(solution_local) = (l_rhs).
1566 * Hence, we multiply @p l_rhs by our already inverted local-local matrix
1567 * and store the result using the <code>set_dof_values</code> function.
1568 *
1569 * @code
1570 *   else
1571 *   {
1572 *   scratch.ll_matrix.vmult(scratch.tmp_rhs, scratch.l_rhs);
1573 *   loc_cell->set_dof_values(scratch.tmp_rhs, solution_local);
1574 *   }
1575 *   }
1576 *  
1577 *  
1578 *  
1579 * @endcode
1580 *
1581 *
1582 * <a name="step_51-HDGcopy_local_to_global"></a>
1583 * <h4>HDG::copy_local_to_global</h4>
1584 * If we are in the first step of the solution, i.e. @p trace_reconstruct=false,
1585 * then we assemble the local matrices into the global system.
1586 *
1587 * @code
1588 *   template <int dim>
1589 *   void HDG<dim>::copy_local_to_global(const PerTaskData &data)
1590 *   {
1591 *   if (data.trace_reconstruct == false)
1592 *   constraints.distribute_local_to_global(data.cell_matrix,
1593 *   data.cell_vector,
1594 *   data.dof_indices,
1595 *   system_matrix,
1596 *   system_rhs);
1597 *   }
1598 *  
1599 *  
1600 *  
1601 * @endcode
1602 *
1603 *
1604 * <a name="step_51-HDGsolve"></a>
1605 * <h4>HDG::solve</h4>
1606 * The skeleton solution is solved for by using a BiCGStab solver with
1607 * identity preconditioner.
1608 *
1609 * @code
1610 *   template <int dim>
1611 *   void HDG<dim>::solve()
1612 *   {
1613 *   SolverControl solver_control(system_matrix.m() * 10,
1614 *   1e-11 * system_rhs.l2_norm());
1615 *   SolverBicgstab<Vector<double>> solver(solver_control);
1616 *   solver.solve(system_matrix, solution, system_rhs, PreconditionIdentity());
1617 *  
1618 *   std::cout << " Number of BiCGStab iterations: "
1619 *   << solver_control.last_step() << std::endl;
1620 *  
1621 *   system_matrix.clear();
1622 *   sparsity_pattern.reinit(0, 0, 0, 1);
1623 *  
1624 *   constraints.distribute(solution);
1625 *  
1626 * @endcode
1627 *
1628 * Once we have solved for the skeleton solution,
1629 * we can solve for the local solutions in an element-by-element
1630 * fashion. We do this by re-using the same @p assemble_system function
1631 * but switching @p trace_reconstruct to true.
1632 *
1633 * @code
1634 *   assemble_system(true);
1635 *   }
1636 *  
1637 *  
1638 *  
1639 * @endcode
1640 *
1641 *
1642 * <a name="step_51-HDGpostprocess"></a>
1643 * <h4>HDG::postprocess</h4>
1644 *
1645
1646 *
1647 * The postprocess method serves two purposes. First, we want to construct a
1648 * post-processed scalar variables in the element space of degree @f$p+1@f$ that
1649 * we hope will converge at order @f$p+2@f$. This is again an element-by-element
1650 * process and only involves the scalar solution as well as the gradient on
1651 * the local cell. To do this, we introduce the already defined scratch data
1652 * together with some update flags and run the work stream to do this in
1653 * parallel.
1654 *
1655
1656 *
1657 * Secondly, we want to compute discretization errors just as we did in
1658 * @ref step_7 "step-7". The overall procedure is similar with calls to
1659 * VectorTools::integrate_difference. The difference is in how we compute
1660 * the errors for the scalar variable and the gradient variable. In @ref step_7 "step-7",
1661 * we did this by computing @p L2_norm or @p H1_seminorm
1662 * contributions. Here, we have a DoFHandler with these two contributions
1663 * computed and sorted by their vector component, <code>[0, dim)</code> for
1664 * the
1665 * gradient and @p dim for the scalar. To compute their value, we hence use
1666 * a ComponentSelectFunction with either of them, together with the @p
1667 * SolutionAndGradient class introduced above that contains the analytic
1668 * parts of either of them. Eventually, we also compute the L2-error of the
1669 * post-processed solution and add the results into the convergence table.
1670 *
1671 * @code
1672 *   template <int dim>
1673 *   void HDG<dim>::postprocess()
1674 *   {
1675 *   {
1676 *   const QGauss<dim> quadrature_formula(fe_u_post.degree + 1);
1677 *   const UpdateFlags local_flags(update_values);
1678 *   const UpdateFlags flags(update_values | update_gradients |
1679 *   update_JxW_values);
1680 *  
1681 *   PostProcessScratchData scratch(
1682 *   fe_u_post, fe_local, quadrature_formula, local_flags, flags);
1683 *  
1684 *   WorkStream::run(
1685 *   dof_handler_u_post.begin_active(),
1686 *   dof_handler_u_post.end(),
1687 *   [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
1688 *   PostProcessScratchData &scratch,
1689 *   unsigned int &data) {
1690 *   this->postprocess_one_cell(cell, scratch, data);
1691 *   },
1692 *   std::function<void(const unsigned int &)>(),
1693 *   scratch,
1694 *   0U);
1695 *   }
1696 *  
1697 *   Vector<float> difference_per_cell(triangulation.n_active_cells());
1698 *  
1699 *   ComponentSelectFunction<dim> value_select(dim, dim + 1);
1700 *   VectorTools::integrate_difference(dof_handler_local,
1701 *   solution_local,
1702 *   SolutionAndGradient<dim>(),
1703 *   difference_per_cell,
1704 *   QGauss<dim>(fe.degree + 2),
1706 *   &value_select);
1707 *   const double L2_error =
1708 *   VectorTools::compute_global_error(triangulation,
1709 *   difference_per_cell,
1711 *  
1712 *   ComponentSelectFunction<dim> gradient_select(
1713 *   std::pair<unsigned int, unsigned int>(0, dim), dim + 1);
1714 *   VectorTools::integrate_difference(dof_handler_local,
1715 *   solution_local,
1716 *   SolutionAndGradient<dim>(),
1717 *   difference_per_cell,
1718 *   QGauss<dim>(fe.degree + 2),
1720 *   &gradient_select);
1721 *   const double grad_error =
1722 *   VectorTools::compute_global_error(triangulation,
1723 *   difference_per_cell,
1725 *  
1726 *   VectorTools::integrate_difference(dof_handler_u_post,
1727 *   solution_u_post,
1728 *   Solution<dim>(),
1729 *   difference_per_cell,
1730 *   QGauss<dim>(fe.degree + 3),
1732 *   const double post_error =
1733 *   VectorTools::compute_global_error(triangulation,
1734 *   difference_per_cell,
1736 *  
1737 *   convergence_table.add_value("cells", triangulation.n_active_cells());
1738 *   convergence_table.add_value("dofs", dof_handler.n_dofs());
1739 *  
1740 *   convergence_table.add_value("val L2", L2_error);
1741 *   convergence_table.set_scientific("val L2", true);
1742 *   convergence_table.set_precision("val L2", 3);
1743 *  
1744 *   convergence_table.add_value("grad L2", grad_error);
1745 *   convergence_table.set_scientific("grad L2", true);
1746 *   convergence_table.set_precision("grad L2", 3);
1747 *  
1748 *   convergence_table.add_value("val L2-post", post_error);
1749 *   convergence_table.set_scientific("val L2-post", true);
1750 *   convergence_table.set_precision("val L2-post", 3);
1751 *   }
1752 *  
1753 *  
1754 *  
1755 * @endcode
1756 *
1757 *
1758 * <a name="step_51-HDGpostprocess_one_cell"></a>
1759 * <h4>HDG::postprocess_one_cell</h4>
1760 *
1761
1762 *
1763 * This is the actual work done for the postprocessing. According to the
1764 * discussion in the introduction, we need to set up a system that projects
1765 * the gradient part of the DG solution onto the gradient of the
1766 * post-processed variable. Moreover, we need to set the average of the new
1767 * post-processed variable to equal the average of the scalar DG solution
1768 * on the cell.
1769 *
1770
1771 *
1772 * More technically speaking, the projection of the gradient is a system
1773 * that would potentially fills our @p dofs_per_cell times @p dofs_per_cell
1774 * matrix but is singular (the sum of all rows would be zero because the
1775 * constant function has zero gradient). Therefore, we take one row away and
1776 * use it for imposing the average of the scalar value. We pick the first
1777 * row for the scalar part, even though we could pick any row for @f$\mathcal
1778 * Q_{-p}@f$ elements. However, had we used FE_DGP elements instead, the first
1779 * row would correspond to the constant part already and deleting e.g. the
1780 * last row would give us a singular system. This way, our program can also
1781 * be used for those elements.
1782 *
1783 * @code
1784 *   template <int dim>
1785 *   void HDG<dim>::postprocess_one_cell(
1786 *   const typename DoFHandler<dim>::active_cell_iterator &cell,
1787 *   PostProcessScratchData &scratch,
1788 *   unsigned int &)
1789 *   {
1790 *   const typename DoFHandler<dim>::active_cell_iterator loc_cell =
1791 *   cell->as_dof_handler_iterator(dof_handler_local);
1792 *  
1793 *   scratch.fe_values_local.reinit(loc_cell);
1794 *   scratch.fe_values.reinit(cell);
1795 *  
1796 *   const FEValuesExtractors::Vector fluxes(0);
1797 *   const FEValuesExtractors::Scalar scalar(dim);
1798 *  
1799 *   const unsigned int n_q_points = scratch.fe_values.get_quadrature().size();
1800 *   const unsigned int dofs_per_cell = scratch.fe_values.dofs_per_cell;
1801 *  
1802 *   scratch.fe_values_local[scalar].get_function_values(solution_local,
1803 *   scratch.u_values);
1804 *   scratch.fe_values_local[fluxes].get_function_values(solution_local,
1805 *   scratch.u_gradients);
1806 *  
1807 *   double sum = 0;
1808 *   for (unsigned int i = 1; i < dofs_per_cell; ++i)
1809 *   {
1810 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
1811 *   {
1812 *   sum = 0;
1813 *   for (unsigned int q = 0; q < n_q_points; ++q)
1814 *   sum += (scratch.fe_values.shape_grad(i, q) *
1815 *   scratch.fe_values.shape_grad(j, q)) *
1816 *   scratch.fe_values.JxW(q);
1817 *   scratch.cell_matrix(i, j) = sum;
1818 *   }
1819 *  
1820 *   sum = 0;
1821 *   for (unsigned int q = 0; q < n_q_points; ++q)
1822 *   sum -= (scratch.fe_values.shape_grad(i, q) * scratch.u_gradients[q]) *
1823 *   scratch.fe_values.JxW(q);
1824 *   scratch.cell_rhs(i) = sum;
1825 *   }
1826 *   for (unsigned int j = 0; j < dofs_per_cell; ++j)
1827 *   {
1828 *   sum = 0;
1829 *   for (unsigned int q = 0; q < n_q_points; ++q)
1830 *   sum += scratch.fe_values.shape_value(j, q) * scratch.fe_values.JxW(q);
1831 *   scratch.cell_matrix(0, j) = sum;
1832 *   }
1833 *   {
1834 *   sum = 0;
1835 *   for (unsigned int q = 0; q < n_q_points; ++q)
1836 *   sum += scratch.u_values[q] * scratch.fe_values.JxW(q);
1837 *   scratch.cell_rhs(0) = sum;
1838 *   }
1839 *  
1840 * @endcode
1841 *
1842 * Having assembled all terms, we can again go on and solve the linear
1843 * system. We invert the matrix and then multiply the inverse by the
1844 * right hand side. An alternative (and more numerically stable) method
1845 * would have been to only factorize the matrix and apply the factorization.
1846 *
1847 * @code
1848 *   scratch.cell_matrix.gauss_jordan();
1849 *   scratch.cell_matrix.vmult(scratch.cell_sol, scratch.cell_rhs);
1850 *   cell->distribute_local_to_global(scratch.cell_sol, solution_u_post);
1851 *   }
1852 *  
1853 *  
1854 *  
1855 * @endcode
1856 *
1857 *
1858 * <a name="step_51-HDGoutput_results"></a>
1859 * <h4>HDG::output_results</h4>
1860 * We have 3 sets of results that we would like to output: the local
1861 * solution, the post-processed local solution, and the skeleton solution. The
1862 * former 2 both 'live' on element volumes, whereas the latter lives on
1863 * codimension-1 surfaces
1864 * of the triangulation. Our @p output_results function writes all local solutions
1865 * to the same vtk file, even though they correspond to different
1866 * DoFHandler objects. The graphical output for the skeleton
1867 * variable is done through use of the DataOutFaces class.
1868 *
1869 * @code
1870 *   template <int dim>
1871 *   void HDG<dim>::output_results(const unsigned int cycle)
1872 *   {
1873 *   std::string filename;
1874 *   switch (refinement_mode)
1875 *   {
1876 *   case global_refinement:
1877 *   filename = "solution-global";
1878 *   break;
1879 *   case adaptive_refinement:
1880 *   filename = "solution-adaptive";
1881 *   break;
1882 *   default:
1884 *   }
1885 *  
1886 *   std::string face_out(filename);
1887 *   face_out += "-face";
1888 *  
1889 *   filename += "-q" + Utilities::int_to_string(fe.degree, 1);
1890 *   filename += "-" + Utilities::int_to_string(cycle, 2);
1891 *   filename += ".vtk";
1892 *   std::ofstream output(filename);
1893 *  
1894 *   DataOut<dim> data_out;
1895 *  
1896 * @endcode
1897 *
1898 * We first define the names and types of the local solution,
1899 * and add the data to @p data_out.
1900 *
1901 * @code
1902 *   std::vector<std::string> names(dim, "gradient");
1903 *   names.emplace_back("solution");
1904 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
1905 *   component_interpretation(
1907 *   component_interpretation[dim] =
1909 *   data_out.add_data_vector(dof_handler_local,
1910 *   solution_local,
1911 *   names,
1912 *   component_interpretation);
1913 *  
1914 * @endcode
1915 *
1916 * The second data item we add is the post-processed solution.
1917 * In this case, it is a single scalar variable belonging to
1918 * a different DoFHandler.
1919 *
1920 * @code
1921 *   std::vector<std::string> post_name(1, "u_post");
1922 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
1924 *   data_out.add_data_vector(dof_handler_u_post,
1925 *   solution_u_post,
1926 *   post_name,
1927 *   post_comp_type);
1928 *  
1929 *   data_out.build_patches(fe.degree);
1930 *   data_out.write_vtk(output);
1931 *  
1932 *   face_out += "-q" + Utilities::int_to_string(fe.degree, 1);
1933 *   face_out += "-" + Utilities::int_to_string(cycle, 2);
1934 *   face_out += ".vtk";
1935 *   std::ofstream face_output(face_out);
1936 *  
1937 * @endcode
1938 *
1939 * The <code>DataOutFaces</code> class works analogously to the
1940 * <code>DataOut</code> class when we have a <code>DoFHandler</code> that
1941 * defines the solution on the skeleton of the triangulation. We treat it
1942 * as such here, and the code is similar to that above.
1943 *
1944 * @code
1945 *   DataOutFaces<dim> data_out_face(false);
1946 *   std::vector<std::string> face_name(1, "u_hat");
1947 *   std::vector<DataComponentInterpretation::DataComponentInterpretation>
1948 *   face_component_type(1, DataComponentInterpretation::component_is_scalar);
1949 *  
1950 *   data_out_face.add_data_vector(dof_handler,
1951 *   solution,
1952 *   face_name,
1953 *   face_component_type);
1954 *  
1955 *   data_out_face.build_patches(fe.degree);
1956 *   data_out_face.write_vtk(face_output);
1957 *   }
1958 *  
1959 * @endcode
1960 *
1961 *
1962 * <a name="step_51-HDGrefine_grid"></a>
1963 * <h4>HDG::refine_grid</h4>
1964 *
1965
1966 *
1967 * We implement two different refinement cases for HDG, just as in
1968 * <code>@ref step_7 "step-7"</code>: adaptive_refinement and global_refinement. The
1969 * global_refinement option recreates the entire triangulation every
1970 * time. This is because we want to use a finer sequence of meshes than what
1971 * we would get with one refinement step, namely 2, 3, 4, 6, 8, 12, 16, ...
1972 * elements per direction.
1973 *
1974
1975 *
1976 * The adaptive_refinement mode uses the <code>KellyErrorEstimator</code> to
1977 * give a decent indication of the non-regular regions in the scalar local
1978 * solutions.
1979 *
1980 * @code
1981 *   template <int dim>
1982 *   void HDG<dim>::refine_grid(const unsigned int cycle)
1983 *   {
1984 *   if (cycle == 0)
1985 *   {
1986 *   GridGenerator::subdivided_hyper_cube(triangulation, 2, -1, 1);
1987 *   triangulation.refine_global(3 - dim);
1988 *   }
1989 *   else
1990 *   switch (refinement_mode)
1991 *   {
1992 *   case global_refinement:
1993 *   {
1994 *   triangulation.clear();
1995 *   GridGenerator::subdivided_hyper_cube(triangulation,
1996 *   2 + (cycle % 2),
1997 *   -1,
1998 *   1);
1999 *   triangulation.refine_global(3 - dim + cycle / 2);
2000 *   break;
2001 *   }
2002 *  
2003 *   case adaptive_refinement:
2004 *   {
2005 *   Vector<float> estimated_error_per_cell(
2006 *   triangulation.n_active_cells());
2007 *  
2008 *   const FEValuesExtractors::Scalar scalar(dim);
2009 *   std::map<types::boundary_id, const Function<dim> *>
2010 *   neumann_boundary;
2011 *   KellyErrorEstimator<dim>::estimate(dof_handler_local,
2012 *   QGauss<dim - 1>(fe.degree + 1),
2013 *   neumann_boundary,
2014 *   solution_local,
2015 *   estimated_error_per_cell,
2016 *   fe_local.component_mask(
2017 *   scalar));
2018 *  
2020 *   triangulation, estimated_error_per_cell, 0.3, 0.);
2021 *  
2022 *   triangulation.execute_coarsening_and_refinement();
2023 *  
2024 *   break;
2025 *   }
2026 *  
2027 *   default:
2028 *   {
2030 *   }
2031 *   }
2032 *  
2033 * @endcode
2034 *
2035 * Just as in @ref step_7 "step-7", we set the boundary indicator of two of the faces to 1
2036 * where we want to specify Neumann boundary conditions instead of Dirichlet
2037 * conditions. Since we re-create the triangulation every time for global
2038 * refinement, the flags are set in every refinement step, not just at the
2039 * beginning.
2040 *
2041 * @code
2042 *   for (const auto &cell : triangulation.cell_iterators())
2043 *   for (const auto &face : cell->face_iterators())
2044 *   if (face->at_boundary())
2045 *   if ((std::fabs(face->center()[0] - (-1)) < 1e-12) ||
2046 *   (std::fabs(face->center()[1] - (-1)) < 1e-12))
2047 *   face->set_boundary_id(1);
2048 *   }
2049 *  
2050 * @endcode
2051 *
2052 *
2053 * <a name="step_51-HDGrun"></a>
2054 * <h4>HDG::run</h4>
2055 * The functionality here is basically the same as <code>@ref step_7 "step-7"</code>.
2056 * We loop over 10 cycles, refining the grid on each one. At the end,
2057 * convergence tables are created.
2058 *
2059 * @code
2060 *   template <int dim>
2061 *   void HDG<dim>::run()
2062 *   {
2063 *   for (unsigned int cycle = 0; cycle < 10; ++cycle)
2064 *   {
2065 *   std::cout << "Cycle " << cycle << ':' << std::endl;
2066 *  
2067 *   refine_grid(cycle);
2068 *   setup_system();
2069 *   assemble_system(false);
2070 *   solve();
2071 *   postprocess();
2072 *   output_results(cycle);
2073 *   }
2074 *  
2075 * @endcode
2076 *
2077 * There is one minor change for the convergence table compared to @ref step_7 "step-7":
2078 * Since we did not refine our mesh by a factor two in each cycle (but
2079 * rather used the sequence 2, 3, 4, 6, 8, 12, ...), we need to tell the
2080 * convergence rate evaluation about this. We do this by setting the
2081 * number of cells as a reference column and additionally specifying the
2082 * dimension of the problem, which gives the necessary information for the
2083 * relation between number of cells and mesh size.
2084 *
2085 * @code
2086 *   if (refinement_mode == global_refinement)
2087 *   {
2088 *   convergence_table.evaluate_convergence_rates(
2089 *   "val L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
2090 *   convergence_table.evaluate_convergence_rates(
2091 *   "grad L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
2092 *   convergence_table.evaluate_convergence_rates(
2093 *   "val L2-post", "cells", ConvergenceTable::reduction_rate_log2, dim);
2094 *   }
2095 *   convergence_table.write_text(std::cout);
2096 *   }
2097 *  
2098 *   } // end of namespace Step51
2099 *  
2100 *  
2101 *  
2102 *   int main()
2103 *   {
2104 *   const unsigned int dim = 2;
2105 *  
2106 *   try
2107 *   {
2108 * @endcode
2109 *
2110 * Now for the three calls to the main class in complete analogy to
2111 * @ref step_7 "step-7".
2112 *
2113 * @code
2114 *   {
2115 *   std::cout << "Solving with Q1 elements, adaptive refinement"
2116 *   << std::endl
2117 *   << "============================================="
2118 *   << std::endl
2119 *   << std::endl;
2120 *  
2121 *   Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::adaptive_refinement);
2122 *   hdg_problem.run();
2123 *  
2124 *   std::cout << std::endl;
2125 *   }
2126 *  
2127 *   {
2128 *   std::cout << "Solving with Q1 elements, global refinement" << std::endl
2129 *   << "===========================================" << std::endl
2130 *   << std::endl;
2131 *  
2132 *   Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::global_refinement);
2133 *   hdg_problem.run();
2134 *  
2135 *   std::cout << std::endl;
2136 *   }
2137 *  
2138 *   {
2139 *   std::cout << "Solving with Q3 elements, global refinement" << std::endl
2140 *   << "===========================================" << std::endl
2141 *   << std::endl;
2142 *  
2143 *   Step51::HDG<dim> hdg_problem(3, Step51::HDG<dim>::global_refinement);
2144 *   hdg_problem.run();
2145 *  
2146 *   std::cout << std::endl;
2147 *   }
2148 *   }
2149 *   catch (std::exception &exc)
2150 *   {
2151 *   std::cerr << std::endl
2152 *   << std::endl
2153 *   << "----------------------------------------------------"
2154 *   << std::endl;
2155 *   std::cerr << "Exception on processing: " << std::endl
2156 *   << exc.what() << std::endl
2157 *   << "Aborting!" << std::endl
2158 *   << "----------------------------------------------------"
2159 *   << std::endl;
2160 *   return 1;
2161 *   }
2162 *   catch (...)
2163 *   {
2164 *   std::cerr << std::endl
2165 *   << std::endl
2166 *   << "----------------------------------------------------"
2167 *   << std::endl;
2168 *   std::cerr << "Unknown exception!" << std::endl
2169 *   << "Aborting!" << std::endl
2170 *   << "----------------------------------------------------"
2171 *   << std::endl;
2172 *   return 1;
2173 *   }
2174 *  
2175 *   return 0;
2176 *   }
2177 * @endcode
2178<a name="step_51-Results"></a><h1>Results</h1>
2179
2180
2181<a name="step_51-Programoutput"></a><h3>Program output</h3>
2182
2183
2184We first have a look at the output generated by the program when run in 2D. In
2185the four images below, we show the solution for polynomial degree @f$p=1@f$
2186and cycles 2, 3, 4, and 8 of the program. In the plots, we overlay the data
2187generated from the internal data (DG part) with the skeleton part (@f$\hat{u}@f$)
2188into the same plot. We had to generate two different data sets because cells
2189and faces represent different geometric entities, the combination of which (in
2190the same file) is not supported in the VTK output of deal.II.
2191
2192The images show the distinctive features of HDG: The cell solution (colored
2193surfaces) is discontinuous between the cells. The solution on the skeleton
2194variable sits on the faces and ties together the local parts. The skeleton
2195solution is not continuous on the vertices where the faces meet, even though
2196its values are quite close along lines in the same coordinate direction. The
2197skeleton solution can be interpreted as a rubber spring between the two sides
2198that balances the jumps in the solution (or rather, the flux @f$\kappa \nabla u
2199+ \mathbf{c} u@f$). From the picture at the top left, it is clear that
2200the bulk solution frequently over- and undershoots and that the
2201skeleton variable in indeed a better approximation to the exact
2202solution; this explains why we can get a better solution using a
2203postprocessing step.
2204
2205As the mesh is refined, the jumps between the cells get
2206small (we represent a smooth solution), and the skeleton solution approaches
2207the interior parts. For cycle 8, there is no visible difference in the two
2208variables. We also see how boundary conditions are implemented weakly and that
2209the interior variables do not exactly satisfy boundary conditions. On the
2210lower and left boundaries, we set Neumann boundary conditions, whereas we set
2211Dirichlet conditions on the right and top boundaries.
2212
2213<table align="center">
2214 <tr>
2215 <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_2.png" alt=""></td>
2216 <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_3.png" alt=""></td>
2217 </tr>
2218 <tr>
2219 <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_4.png" alt=""></td>
2220 <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_8.png" alt=""></td>
2221 </tr>
2222</table>
2223
2224Next, we have a look at the post-processed solution, again at cycles 2, 3, 4,
2225and 8. This is a discontinuous solution that is locally described by second
2226order polynomials. While the solution does not look very good on the mesh of
2227cycle two, it looks much better for cycles three and four. As shown by the
2228convergence table below, we find that is also converges more quickly to the
2229analytical solution.
2230
2231<table align="center">
2232 <tr>
2233 <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_2.png" alt=""></td>
2234 <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_3.png" alt=""></td>
2235 </tr>
2236 <tr>
2237 <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_4.png" alt=""></td>
2238 <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_8.png" alt=""></td>
2239 </tr>
2240</table>
2241
2242Finally, we look at the solution for @f$p=3@f$ at cycle 2. Despite the coarse
2243mesh with only 64 cells, the post-processed solution is similar in quality
2244to the linear solution (not post-processed) at cycle 8 with 4,096
2245cells. This clearly shows the superiority of high order methods for smooth
2246solutions.
2247
2248<table align="center">
2249 <tr>
2250 <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_q3_2.png" alt=""></td>
2251 <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_q3_2.png" alt=""></td>
2252 </tr>
2253</table>
2254
2255<a name="step_51-Convergencetables"></a><h4>Convergence tables</h4>
2256
2257
2258When the program is run, it also outputs information about the respective
2259steps and convergence tables with errors in the various components in the
2260end. In 2D, the convergence tables look the following:
2261
2262@code
2263Q1 elements, adaptive refinement:
2264cells dofs val L2 grad L2 val L2-post
2265 16 80 1.804e+01 2.207e+01 1.798e+01
2266 31 170 9.874e+00 1.322e+01 9.798e+00
2267 61 314 7.452e-01 3.793e+00 4.891e-01
2268 121 634 3.240e-01 1.511e+00 2.616e-01
2269 238 1198 8.585e-02 8.212e-01 1.808e-02
2270 454 2290 4.802e-02 5.178e-01 2.195e-02
2271 898 4378 2.561e-02 2.947e-01 4.318e-03
2272 1720 7864 1.306e-02 1.664e-01 2.978e-03
2273 3271 14638 7.025e-03 9.815e-02 1.075e-03
2274 6217 27214 4.119e-03 6.407e-02 9.975e-04
2275
2276Q1 elements, global refinement:
2277cells dofs val L2 grad L2 val L2-post
2278 16 80 1.804e+01 - 2.207e+01 - 1.798e+01 -
2279 36 168 6.125e+00 2.66 9.472e+00 2.09 6.084e+00 2.67
2280 64 288 9.785e-01 6.38 4.260e+00 2.78 7.102e-01 7.47
2281 144 624 2.730e-01 3.15 1.866e+00 2.04 6.115e-02 6.05
2282 256 1088 1.493e-01 2.10 1.046e+00 2.01 2.880e-02 2.62
2283 576 2400 6.965e-02 1.88 4.846e-01 1.90 9.204e-03 2.81
2284 1024 4224 4.018e-02 1.91 2.784e-01 1.93 4.027e-03 2.87
2285 2304 9408 1.831e-02 1.94 1.264e-01 1.95 1.236e-03 2.91
2286 4096 16640 1.043e-02 1.96 7.185e-02 1.96 5.306e-04 2.94
2287 9216 37248 4.690e-03 1.97 3.228e-02 1.97 1.599e-04 2.96
2288
2289Q3 elements, global refinement:
2290cells dofs val L2 grad L2 val L2-post
2291 16 160 3.613e-01 - 1.891e+00 - 3.020e-01 -
2292 36 336 6.411e-02 4.26 5.081e-01 3.24 3.238e-02 5.51
2293 64 576 3.480e-02 2.12 2.533e-01 2.42 5.277e-03 6.31
2294 144 1248 8.297e-03 3.54 5.924e-02 3.58 6.330e-04 5.23
2295 256 2176 2.254e-03 4.53 1.636e-02 4.47 1.403e-04 5.24
2296 576 4800 4.558e-04 3.94 3.277e-03 3.96 1.844e-05 5.01
2297 1024 8448 1.471e-04 3.93 1.052e-03 3.95 4.378e-06 5.00
2298 2304 18816 2.956e-05 3.96 2.104e-04 3.97 5.750e-07 5.01
2299 4096 33280 9.428e-06 3.97 6.697e-05 3.98 1.362e-07 5.01
2300 9216 74496 1.876e-06 3.98 1.330e-05 3.99 1.788e-08 5.01
2301@endcode
2302
2303
2304One can see the error reduction upon grid refinement, and for the cases where
2305global refinement was performed, also the convergence rates. The quadratic
2306convergence rates of Q1 elements in the @f$L_2@f$ norm for both the scalar
2307variable and the gradient variable is apparent, as is the cubic rate for the
2308postprocessed scalar variable in the @f$L_2@f$ norm. Note this distinctive
2309feature of an HDG solution. In typical continuous finite elements, the
2310gradient of the solution of order @f$p@f$ converges at rate @f$p@f$ only, as
2311opposed to @f$p+1@f$ for the actual solution. Even though superconvergence
2312results for finite elements are also available (e.g. superconvergent patch
2313recovery first introduced by Zienkiewicz and Zhu), these are typically limited
2314to structured meshes and other special cases. For Q3 HDG variables, the scalar
2315variable and gradient converge at fourth order and the postprocessed scalar
2316variable at fifth order.
2317
2318The same convergence rates are observed in 3d.
2319@code
2320Q1 elements, adaptive refinement:
2321cells dofs val L2 grad L2 val L2-post
2322 8 144 7.122e+00 1.941e+01 6.102e+00
2323 29 500 3.309e+00 1.023e+01 2.145e+00
2324 113 1792 2.204e+00 1.023e+01 1.912e+00
2325 379 5732 6.085e-01 5.008e+00 2.233e-01
2326 1317 19412 1.543e-01 1.464e+00 4.196e-02
2327 4579 64768 5.058e-02 5.611e-01 9.521e-03
2328 14596 199552 2.129e-02 3.122e-01 4.569e-03
2329 46180 611400 1.033e-02 1.622e-01 1.684e-03
2330144859 1864212 5.007e-03 8.371e-02 7.364e-04
2331451060 5684508 2.518e-03 4.562e-02 3.070e-04
2332
2333Q1 elements, global refinement:
2334cells dofs val L2 grad L2 val L2-post
2335 8 144 7.122e+00 - 1.941e+01 - 6.102e+00 -
2336 27 432 5.491e+00 0.64 2.184e+01 -0.29 4.448e+00 0.78
2337 64 960 3.646e+00 1.42 1.299e+01 1.81 3.306e+00 1.03
2338 216 3024 1.595e+00 2.04 8.550e+00 1.03 1.441e+00 2.05
2339 512 6912 6.922e-01 2.90 5.306e+00 1.66 2.511e-01 6.07
2340 1728 22464 2.915e-01 2.13 2.490e+00 1.87 8.588e-02 2.65
2341 4096 52224 1.684e-01 1.91 1.453e+00 1.87 4.055e-02 2.61
2342 13824 172800 7.972e-02 1.84 6.861e-01 1.85 1.335e-02 2.74
2343 32768 405504 4.637e-02 1.88 3.984e-01 1.89 5.932e-03 2.82
2344110592 1354752 2.133e-02 1.92 1.830e-01 1.92 1.851e-03 2.87
2345
2346Q3 elements, global refinement:
2347cells dofs val L2 grad L2 val L2-post
2348 8 576 5.670e+00 - 1.868e+01 - 5.462e+00 -
2349 27 1728 1.048e+00 4.16 6.988e+00 2.42 8.011e-01 4.73
2350 64 3840 2.831e-01 4.55 2.710e+00 3.29 1.363e-01 6.16
2351 216 12096 7.883e-02 3.15 7.721e-01 3.10 2.158e-02 4.55
2352 512 27648 3.642e-02 2.68 3.305e-01 2.95 5.231e-03 4.93
2353 1728 89856 8.546e-03 3.58 7.581e-02 3.63 7.640e-04 4.74
2354 4096 208896 2.598e-03 4.14 2.313e-02 4.13 1.783e-04 5.06
2355 13824 691200 5.314e-04 3.91 4.697e-03 3.93 2.355e-05 4.99
2356 32768 1622016 1.723e-04 3.91 1.517e-03 3.93 5.602e-06 4.99
2357110592 5419008 3.482e-05 3.94 3.055e-04 3.95 7.374e-07 5.00
2358@endcode
2359
2360<a name="step_51-Comparisonwithcontinuousfiniteelements"></a><h3>Comparison with continuous finite elements</h3>
2361
2362
2363<a name="step_51-Resultsfor2D"></a><h4>Results for 2D</h4>
2364
2365
2366The convergence tables verify the expected convergence rates stated in the
2367introduction. Now, we want to show a quick comparison of the computational
2368efficiency of the HDG method compared to a usual finite element (continuous
2369Galkerin) method on the problem of this tutorial. Of course, stability aspects
2370of the HDG method compared to continuous finite elements for
2371transport-dominated problems are also important in practice, which is an
2372aspect not seen on a problem with smooth analytic solution. In the picture
2373below, we compare the @f$L_2@f$ error as a function of the number of degrees of
2374freedom (left) and of the computing time spent in the linear solver (right)
2375for two space dimensions of continuous finite elements (CG) and the hybridized
2376discontinuous Galerkin method presented in this tutorial. As opposed to the
2377tutorial where we only use unpreconditioned BiCGStab, the times shown in the
2378figures below use the Trilinos algebraic multigrid preconditioner in
2379TrilinosWrappers::PreconditionAMG. For the HDG part, a wrapper around
2380ChunkSparseMatrix for the trace variable has been used in order to utilize the
2381block structure in the matrix on the finest level.
2382
2383<table align="center">
2384 <tr>
2385 <td><img src="https://www.dealii.org/images/steps/developer/step-51.2d_plain.png" width="400" alt=""></td>
2386 <td><img src="https://www.dealii.org/images/steps/developer/step-51.2dt_plain.png" width="400" alt=""></td>
2387 </tr>
2388</table>
2389
2390The results in the graphs show that the HDG method is slower than continuous
2391finite elements at @f$p=1@f$, about equally fast for cubic elements and
2392faster for sixth order elements. However, we have seen above that the HDG
2393method actually produces solutions which are more accurate than what is
2394represented in the original variables. Therefore, in the next two plots below
2395we instead display the error of the post-processed solution for HDG (denoted
2396by @f$p=1^*@f$ for example). We now see a clear advantage of HDG for the same
2397amount of work for both @f$p=3@f$ and @f$p=6@f$, and about the same quality
2398for @f$p=1@f$.
2399
2400<table align="center">
2401 <tr>
2402 <td><img src="https://www.dealii.org/images/steps/developer/step-51.2d_post.png" width="400" alt=""></td>
2403 <td><img src="https://www.dealii.org/images/steps/developer/step-51.2dt_post.png" width="400" alt=""></td>
2404 </tr>
2405</table>
2406
2407Since the HDG method actually produces results converging as
2408@f$h^{p+2}@f$, we should compare it to a continuous Galerkin
2409solution with the same asymptotic convergence behavior, i.e., FE_Q with degree
2410@f$p+1@f$. If we do this, we get the convergence curves below. We see that
2411CG with second order polynomials is again clearly better than HDG with
2412linears. However, the advantage of HDG for higher orders remains.
2413
2414<table align="center">
2415 <tr>
2416 <td><img src="https://www.dealii.org/images/steps/developer/step-51.2d_postb.png" width="400" alt=""></td>
2417 <td><img src="https://www.dealii.org/images/steps/developer/step-51.2dt_postb.png" width="400" alt=""></td>
2418 </tr>
2419</table>
2420
2421The results are in line with properties of DG methods in general: Best
2422performance is typically not achieved for linear elements, but rather at
2423somewhat higher order, usually around @f$p=3@f$. This is because of a
2424volume-to-surface effect for discontinuous solutions with too much of the
2425solution living on the surfaces and hence duplicating work when the elements
2426are linear. Put in other words, DG methods are often most efficient when used
2427at relatively high order, despite their focus on a discontinuous (and hence,
2428seemingly low accurate) representation of solutions.
2429
2430<a name="step_51-Resultsfor3D"></a><h4>Results for 3D</h4>
2431
2432
2433We now show the same figures in 3D: The first row shows the number of degrees
2434of freedom and computing time versus the @f$L_2@f$ error in the scalar variable
2435@f$u@f$ for CG and HDG at order @f$p@f$, the second row shows the
2436post-processed HDG solution instead of the original one, and the third row
2437compares the post-processed HDG solution with CG at order @f$p+1@f$. In 3D,
2438the volume-to-surface effect makes the cost of HDG somewhat higher and the CG
2439solution is clearly better than HDG for linears by any metric. For cubics, HDG
2440and CG are of similar quality, whereas HDG is again more efficient for sixth
2441order polynomials. One can alternatively also use the combination of FE_DGP
2442and FE_FaceP instead of (FE_DGQ, FE_FaceQ), which do not use tensor product
2443polynomials of degree @f$p@f$ but Legendre polynomials of <i>complete</i>
2444degree @f$p@f$. There are fewer degrees of freedom on the skeleton variable
2445for FE_FaceP for a given mesh size, but the solution quality (error vs. number
2446of DoFs) is very similar to the results for FE_FaceQ.
2447
2448<table align="center">
2449 <tr>
2450 <td><img src="https://www.dealii.org/images/steps/developer/step-51.3d_plain.png" width="400" alt=""></td>
2451 <td><img src="https://www.dealii.org/images/steps/developer/step-51.3dt_plain.png" width="400" alt=""></td>
2452 </tr>
2453 <tr>
2454 <td><img src="https://www.dealii.org/images/steps/developer/step-51.3d_post.png" width="400" alt=""></td>
2455 <td><img src="https://www.dealii.org/images/steps/developer/step-51.3dt_post.png" width="400" alt=""></td>
2456 </tr>
2457 <tr>
2458 <td><img src="https://www.dealii.org/images/steps/developer/step-51.3d_postb.png" width="400" alt=""></td>
2459 <td><img src="https://www.dealii.org/images/steps/developer/step-51.3dt_postb.png" width="400" alt=""></td>
2460 </tr>
2461</table>
2462
2463One final note on the efficiency comparison: We tried to use general-purpose
2464sparse matrix structures and similar solvers (optimal AMG preconditioners for
2465both without particular tuning of the AMG parameters on any of them) to give a
2466fair picture of the cost versus accuracy of two methods, on a toy example. It
2467should be noted however that geometric multigrid (GMG) for continuous finite
2468elements is about a factor four to five faster for @f$p=3@f$ and @f$p=6@f$. As of
24692019, optimal-complexity iterative solvers for HDG are still under development
2470in the research community. Also, there are other implementation aspects for CG
2471available such as fast matrix-free approaches as shown in @ref step_37 "step-37" that make
2472higher order continuous elements more competitive. Again, it is not clear to
2473the authors of the tutorial whether similar improvements could be made for
2474HDG. We refer to <a href="https://dx.doi.org/10.1137/16M110455X">Kronbichler
2475and Wall (2018)</a> for a recent efficiency evaluation.
2476
2477
2478<a name="step_51-Possibilitiesforimprovements"></a><h3>Possibilities for improvements</h3>
2479
2480
2481As already mentioned in the introduction, one possibility is to implement
2482another post-processing technique as discussed in the literature.
2483
2484A second item that is not done optimally relates to the performance of this
2485program, which is of course an issue in practical applications (weighing in
2486also the better solution quality of (H)DG methods for transport-dominated
2487problems). Let us look at
2488the computing time of the tutorial program and the share of the individual
2489components:
2490
2491<table align="center" class="doxtable">
2492 <tr>
2493 <th>&nbsp;</th>
2494 <th>&nbsp;</th>
2495 <th>Setup</th>
2496 <th>Assemble</th>
2497 <th>Solve</th>
2498 <th>Trace reconstruct</th>
2499 <th>Post-processing</th>
2500 <th>Output</th>
2501 </tr>
2502 <tr>
2503 <th>&nbsp;</th>
2504 <th>Total time</th>
2505 <th colspan="6">Relative share</th>
2506 </tr>
2507 <tr>
2508 <td align="left">2D, Q1, cycle 9, 37,248 dofs</td>
2509 <td align="center">5.34s</td>
2510 <td align="center">0.7%</td>
2511 <td align="center">1.2%</td>
2512 <td align="center">89.5%</td>
2513 <td align="center">0.9%</td>
2514 <td align="center">2.3%</td>
2515 <td align="center">5.4%</td>
2516 </tr>
2517 <tr>
2518 <td align="left">2D, Q3, cycle 9, 74,496 dofs</td>
2519 <td align="center">22.2s</td>
2520 <td align="center">0.4%</td>
2521 <td align="center">4.3%</td>
2522 <td align="center">84.1%</td>
2523 <td align="center">4.1%</td>
2524 <td align="center">3.5%</td>
2525 <td align="center">3.6%</td>
2526 </tr>
2527 <tr>
2528 <td align="left">3D, Q1, cycle 7, 172,800 dofs</td>
2529 <td align="center">9.06s</td>
2530 <td align="center">3.1%</td>
2531 <td align="center">8.9%</td>
2532 <td align="center">42.7%</td>
2533 <td align="center">7.0%</td>
2534 <td align="center">20.6%</td>
2535 <td align="center">17.7%</td>
2536 </tr>
2537 <tr>
2538 <td align="left">3D, Q3, cycle 7, 691,200 dofs</td>
2539 <td align="center">516s</td>
2540 <td align="center">0.6%</td>
2541 <td align="center">34.5%</td>
2542 <td align="center">13.4%</td>
2543 <td align="center">32.8%</td>
2544 <td align="center">17.1%</td>
2545 <td align="center">1.5%</td>
2546 </tr>
2547</table>
2548
2549As can be seen from the table, the solver and assembly calls dominate the
2550runtime of the program. This also gives a clear indication of where
2551improvements would make the most sense:
2552
2553<ol>
2554 <li> Better linear solvers: We use a BiCGStab iterative solver without
2555 preconditioner, where the number of iteration increases with increasing
2556 problem size (the number of iterations for Q1 elements and global
2557 refinements starts at 35 for the small sizes but increase up to 701 for the
2558 largest size). To do better, one could for example use an algebraic
2559 multigrid preconditioner from Trilinos, or some more advanced variants as
2560 the one discussed in <a
2561 href="https://dx.doi.org/10.1137/16M110455X">Kronbichler and Wall
2562 (2018)</a>. For diffusion-dominated problems such as the problem at hand
2563 with finer meshes, such a solver can be designed that uses the matrix-vector
2564 products from the more efficient ChunkSparseMatrix on the finest level, as
2565 long as we are not working in parallel with MPI. For MPI-parallelized
2566 computations, a standard TrilinosWrappers::SparseMatrix can be used.
2567
2568 <li> Speed up assembly by pre-assembling parts that do not change from one
2569 cell to another (those that do neither contain variable coefficients nor
2570 mapping-dependent terms).
2571</ol>
2572 *
2573 *
2574<a name="step_51-PlainProg"></a>
2575<h1> The plain program</h1>
2576@include "step-51.cc"
2577*/
Definition fe_q.h:554
void mmult(FullMatrix< number2 > &C, const FullMatrix< number2 > &B, const bool adding=false) const
virtual void vector_value(const Point< dim > &p, Vector< RangeNumberType > &values) const
static void estimate(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const Quadrature< dim - 1 > &quadrature, const std::map< types::boundary_id, const Function< spacedim, Number > * > &neumann_bc, const ReadVector< Number > &solution, Vector< float > &error, const ComponentMask &component_mask={}, const Function< spacedim > *coefficients=nullptr, const unsigned int n_threads=numbers::invalid_unsigned_int, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id, const types::material_id material_id=numbers::invalid_material_id, const Strategy strategy=cell_diameter_over_24)
Definition point.h:113
TensorFunction(const time_type initial_time=time_type(0.0))
const unsigned int DoFAccessor< structdim, dim, spacedim, level_dof_access >::dimension
#define DEAL_II_NOT_IMPLEMENTED()
#define AssertDimension(dim1, dim2)
typename ActiveSelector::active_cell_iterator active_cell_iterator
void loop(IteratorType begin, std_cxx20::type_identity_t< IteratorType > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, AssemblerType &assembler, const LoopControl &lctrl=LoopControl())
Definition loop.h:564
const bool IsBlockVector< VectorType >::value
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternBase &sparsity_pattern, const AffineConstraints< number > &constraints={}, const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
UpdateFlags
@ update_values
Shape function values.
@ update_normal_vectors
Normal vectors.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
MappingQ< dim, spacedim > StaticMappingQ1< dim, spacedim >::mapping
Definition mapping_q1.h:104
Expression sign(const Expression &x)
void subdivided_hyper_cube(Triangulation< dim, spacedim > &tria, const unsigned int repetitions, const double left=0., const double right=1., const bool colorize=false)
void refine(Triangulation< dim, spacedim > &tria, const Vector< Number > &criteria, const double threshold, const unsigned int max_to_mark=numbers::invalid_unsigned_int)
void refine_and_coarsen_fixed_number(Triangulation< dim, spacedim > &triangulation, const Vector< Number > &criteria, const double top_fraction_of_cells, const double bottom_fraction_of_cells, const unsigned int max_n_cells=std::numeric_limits< unsigned int >::max())
void scale(const double scaling_factor, Triangulation< dim, spacedim > &triangulation)
double volume(const Triangulation< dim, spacedim > &tria)
constexpr char O
@ matrix
Contents is actually a matrix.
@ general
No special properties.
constexpr char T
constexpr types::blas_int zero
constexpr types::blas_int one
void cell_matrix(FullMatrix< double > &M, const FEValuesBase< dim > &fe, const FEValuesBase< dim > &fetest, const ArrayView< const std::vector< double > > &velocity, const double factor=1.)
Definition advection.h:74
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
Definition divergence.h:471
void L2(Vector< number > &result, const FEValuesBase< dim > &fe, const std::vector< double > &input, const double factor=1.)
Definition l2.h:159
SymmetricTensor< 2, dim, Number > C(const Tensor< 2, dim, Number > &F)
Tensor< 2, dim, Number > w(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
void apply(const Kokkos::TeamPolicy< MemorySpace::Default::kokkos_space::execution_space >::member_type &team_member, const Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_data, const ViewTypeIn in, ViewTypeOut out)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
VectorType::value_type * end(VectorType &V)
T sum(const T &t, const MPI_Comm mpi_communicator)
std::string int_to_string(const unsigned int value, const unsigned int digits=numbers::invalid_unsigned_int)
Definition utilities.cc:466
double compute_global_error(const Triangulation< dim, spacedim > &tria, const InVector &cellwise_error, const NormType &norm, const double exponent=2.)
void integrate_difference(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const ReadVector< Number > &fe_function, const Function< spacedim, Number > &exact_solution, OutVector &difference, const Quadrature< dim > &q, const NormType &norm, const Function< spacedim, double > *weight=nullptr, const double exponent=2.)
void project_boundary_values(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const std::map< types::boundary_id, const Function< spacedim, number > * > &boundary_functions, const Quadrature< dim - 1 > &q, std::map< types::global_dof_index, number > &boundary_values, std::vector< unsigned int > component_mapping={})
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
void run(const std::vector< std::vector< Iterator > > &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
void copy(const T *begin, const T *end, U *dest)
int(& functions)(const void *v1, const void *v2)
void assemble(const MeshWorker::DoFInfoBox< dim, DOFINFO > &dinfo, A *assembler)
Definition loop.h:70
void reinit(MatrixBlock< MatrixType > &v, const BlockSparsityPattern &p)
void set_dof_values(const DoFCellAccessor< dim, spacedim, lda > &cell, const Vector< number > &local_values, OutputVector &values, const bool perform_check)
constexpr double PI
Definition numbers.h:239
::VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &, const Number p)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
Definition types.h:32
static constexpr unsigned int faces_per_cell
static std_cxx20::ranges::iota_view< unsigned int, unsigned int > face_indices()
DEAL_II_HOST constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &)
DEAL_II_HOST constexpr Number trace(const SymmetricTensor< 2, dim2, Number > &)
std_cxx20::type_identity< T > identity