1#ifndef DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
2#define DUNE_FEM_BASEFUNCTIONSETS_CODEGEN_HH
13#include <dune/common/exceptions.hh>
34 std::string outFileName_;
42 typedef std::set< int > DimRangeSetType;
43 typedef std::set< void* > BaseSetPointerType;
44 DimRangeSetType savedimRanges_;
45 mutable DimRangeSetType dimRanges_;
46 BaseSetPointerType savedBaseSets_;
48 typedef void codegenfunc_t (std::ostream&
out,
52 const size_t numCols );
54 typedef std::pair< std::string, int > EntryType;
55 std::vector< EntryType > filenames_;
57 typedef std::vector< int > KeyType;
58 typedef std::set< KeyType > KeyStorageType;
60 typedef std::pair< int, std::pair< int, int > > EvalPairType ;
61 typedef std::set< EvalPairType > EvalSetType ;
63 std::map< codegenfunc_t* , KeyStorageType > entryMap_;
68 : path_(
Dune::Fem::
Parameter::commonInputPath() +
"/autogeneratedcode"),
69 outFileName_(
"autogeneratedcode.hh" ),
70 nopMax_(0), nopMin_(0), baseMax_(0), baseMin_(0),
71 dimRanges_(), savedBaseSets_(), filenames_(),
84 savedBaseSets_.clear();
99 void setFileName(
const std::string& filename ) { outFileName_ = filename ; }
101 template <
class BaseSet>
105 void* ptr = (
void *) baseSet;
106 if( savedBaseSets_.find( ptr ) == savedBaseSets_.end() )
108 savedBaseSets_.insert( ptr );
110 std::cout <<
"Add dimRange " << dimRange << std::endl;
112 dimRanges_.insert( dimRange ) ;
117 codegenfunc_t* codegenfunc,
118 const int dim,
const int dimRange,
const int quadNop,
const int numBase )
120 KeyStorageType& keyMap = entryMap_[ codegenfunc ];
122 typedef KeyStorageType :: iterator iterator;
130 iterator it = keyMap.find( key );
131 if( it != keyMap.end() ) return ;
134 assert( dimRanges_.find( dimRange ) != dimRanges_.end() );
139 std::stringstream filename;
140 filename << fileprefix << dimRange <<
"_" << quadNop <<
"_" << numBase <<
".hh";
143 int pos =
exists( filename.str() );
144 if( pos >= 0 )
return;
146 std::string filenameToWrite( path_ +
"/" + filename.str() );
150 std::stringstream code;
152 codegenfunc( code, dim, dimRange, quadNop, numBase );
154 bool writeCode = false ;
157 std::ifstream infile( filenameToWrite );
160 std::stringstream checkstr;
161 checkstr << infile.rdbuf();
165 if( checkstr.str().compare( code.str() ) != 0 )
182 std::ofstream file( filenameToWrite, std::ios::out );
186 std::cout <<
"Generate code " << fileprefix <<
" for (" << dimRange <<
","
187 << quadNop <<
"," << numBase <<
")" << std::endl;
193 EvalPairType evalPair ( dimRange, std::make_pair(quadNop, numBase) );
194 evalSet_.insert( evalPair );
196 if( baseMin_ == 0 ) baseMin_ = numBase;
197 if( nopMin_ == 0 ) nopMin_ = quadNop;
199 EntryType entry ( filename.str() , dimRange );
200 filenames_.push_back( entry );
201 nopMax_ =
std::max( quadNop, nopMax_ );
202 nopMin_ =
std::min( quadNop, nopMin_ );
203 baseMax_ =
std::max( numBase, baseMax_ );
204 baseMin_ =
std::min( numBase, baseMin_ );
212 std::cerr <<
"All automated code generated, bye, bye !! " << std::endl;
218 bool dumpInfo(
const bool writeToCurrentDir =
false )
const
220 if( writeToCurrentDir )
224 std::ofstream file( outFileName_.c_str() );
225 file <<
"#include \"" <<path_<<
"/" << outFileName_ <<
"\"";
229 std::string filename( path_ +
"/" + outFileName_ );
230 std::stringstream file;
232 file <<
"#ifdef CODEGEN_INCLUDEMAXNUMS" << std::endl;
233 file <<
"#ifndef CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl;
234 file <<
"#define CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
235 file <<
"#define MAX_NUMBER_OF_QUAD_POINTS " << nopMax_ << std::endl;
236 file <<
"#define MIN_NUMBER_OF_QUAD_POINTS " << nopMin_ << std::endl;
237 file <<
"#define MAX_NUMBER_OF_BASE_FCT " << baseMax_ << std::endl;
238 file <<
"#define MIN_NUMBER_OF_BASE_FCT " << baseMin_ << std::endl << std::endl;
240 file <<
"/* include all headers with inner loop extern declarations */" << std::endl;
241 file <<
"#define CODEGEN_COMPILE_INNERLOOPS 1" << std::endl;
242 file <<
"namespace Dune {" << std::endl;
243 file <<
"namespace Fem {" << std::endl;
244 file <<
"namespace Codegen {" << std::endl;
245 for(
size_t i = 0; i < filenames_.size(); ++i )
247 file <<
"#include \""<< filenames_[ i ].first <<
"\"" << std::endl;
249 file <<
"}}} // end namespaces" << std::endl;
250 file <<
"#undef CODEGEN_COMPILE_INNERLOOPS" << std::endl << std::endl;
251 file <<
"#include \"" << filename <<
".c\"" <<std::endl;
254 file <<
"#endif // CODEGEN_INCLUDEMAXNUMS_INCLUDED" << std::endl << std::endl;
255 file <<
"#elif defined CODEGEN_INCLUDEEVALCALLERS" << std::endl;
256 file <<
"#ifndef CODEGEN_EVALCALLERS_INCLUDED" << std::endl;
257 file <<
"#define CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
258 file <<
"namespace Dune {"<< std::endl;
259 file <<
"namespace Fem {"<< std::endl;
260 file <<
"namespace Codegen {"<< std::endl;
261 typedef EvalSetType :: iterator iterator ;
262 const iterator endit = evalSet_.end();
263 for( iterator it = evalSet_.begin(); it != endit; ++it )
265 int dimRange = it->first;
266 int quadNop = it->second.first;
267 int numBase = it->second.second;
268 file <<
" template <class Traits>" << std::endl;
269 file <<
" struct EvaluateImplementation< Traits, " << dimRange <<
" , " << quadNop <<
" , " << numBase <<
" >" << std::endl;
270 file <<
" : public EvaluateRealImplementation< Traits, " << dimRange <<
" , " << quadNop <<
" , " << numBase <<
" >" << std::endl;
271 file <<
" {" << std::endl;
272 file <<
" typedef EvaluateRealImplementation< Traits, " << dimRange <<
" , " << quadNop <<
" , " << numBase <<
" > BaseType;" << std::endl;
273 file <<
" typedef typename BaseType :: RangeVectorType RangeVectorType;" << std::endl;
274 file <<
" EvaluateImplementation( const RangeVectorType& rv ) : BaseType ( rv ) {}" << std::endl;
275 file <<
" };"<< std::endl;
278 file <<
"}}} // end namespaces"<< std::endl;
279 file <<
"#endif // CODEGEN_EVALCALLERS_INCLUDED" << std::endl << std::endl;
280 file <<
"#else" << std::endl << std::endl ;
281 file <<
"#ifndef CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
282 file <<
"#define CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl;
284 file <<
"namespace Dune {" << std::endl;
285 file <<
"namespace Fem {" << std::endl;
286 file <<
"namespace Codegen {" << std::endl;
287 for(
size_t i = 0; i < filenames_.size(); ++i )
289 file <<
"#include \""<< filenames_[ i ].first <<
"\"" << std::endl;
291 file <<
"}}} // end namespaces" << std::endl;
292 file <<
"#endif // CODEGEN_INCLUDE_IMPLEMENTATION" << std::endl << std::endl;
293 file <<
"#endif // CODEGEN_INCLUDEMAXNUMS" << std::endl;
295 std::ifstream infile( filename );
298 std::stringstream checkstr;
299 checkstr << infile.rdbuf();
303 if( checkstr.str().compare( file.str() ) == 0 )
307 std::ofstream outfile( filename );
308 outfile << file.str();
314 std::ofstream Cfile( filename.c_str() );
316 Cfile <<
"#include <stdlib.h>" << std::endl;
317 Cfile <<
"/* include all headers with inner loop implementation */" << std::endl;
318 Cfile <<
"#define CODEGEN_COMPILE_INNERLOOPS 2" << std::endl;
319 for(
size_t i = 0; i < filenames_.size(); ++i )
321 Cfile <<
"#include \""<< filenames_[ i ].first <<
"\"" << std::endl;
328 int exists(
const std::string& filename )
const
330 for(
size_t i = 0; i < filenames_.size(); ++i )
332 if( filename == filenames_[ i ].first )
342 DimRangeSetType found ;
343 bool canAbort = true ;
344 for(
size_t i = 0; i < filenames_.size(); ++i )
346 found.insert( filenames_[ i ].second );
347 if ( filenames_[ i ].second > 0 )
352 typedef DimRangeSetType :: iterator iterator ;
353 for( iterator it = found.begin(); it != found.end(); ++it )
355 dimRanges_.erase( *it );
358 if( canAbort && dimRanges_.size() == 0 )
366 template <
int simdW
idth = 4 >
371 return "__restrict__";
377 return "Dune::Fem::Double";
385 const char* codegenPreCompVar =
"CODEGEN_COMPILE_INNERLOOPS";
388 out <<
"#ifndef HEADERCHECK" << std::endl;
391 else if( stage == 0 )
393 out <<
"#else" << std::endl;
394 out <<
"#if " << codegenPreCompVar <<
" == 1" << std::endl;
395 out <<
"extern \"C\" {" << std::endl
396 <<
" extern inline" << std::endl;
397 out <<
"#endif" << std::endl;
399 else if( stage == 1 )
401 out <<
"#if " << codegenPreCompVar <<
" == 1" << std::endl;
402 out <<
" ;" << std::endl;
403 out <<
"}" << std::endl;
404 out <<
"#else" << std::endl;
410 out <<
"#endif" << std::endl;
416 const int simdW,
const int dimRange,
417 const size_t numRows,
const size_t numCols )
419 std::stringstream funcName;
420 funcName << prefix <<
"_" << simdWidth <<
"_" << dimRange <<
"_" << numRows <<
"_" << numCols ;
421 return funcName.str();
424 static void writeInnerLoopEval(std::ostream&
out,
const int simdW,
const int dimRange,
const size_t numRows,
const size_t numCols )
426 out <<
" // Loop over all quadrature points" << std::endl;
427 out <<
" for(int qp = 0; qp < " << numRows <<
" ; ++qp )" << std::endl;
428 out <<
" {" << std::endl;
440 for(
int i = 0 ; i< simdW ; ++ i )
441 out <<
" const " <<
doubletype() <<
" phi" << i <<
" = base" << i <<
"[ qp ];" << std::endl;
442 for(
int r = 0; r < dimRange; ++ r )
444 out <<
" result" << r <<
"[ qp ] += phi0 * dof0" << r;
445 for(
int i=1; i<simdW; ++i )
446 out <<
" + phi" << i <<
" * dof" << i << r;
447 out <<
" ;" << std::endl;
449 out <<
" }" << std::endl;
455 const size_t numRows,
456 const size_t numCols )
458 const std::string funcName =
463 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
464 out <<
"struct EvaluateRanges<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
465 out <<
"{" << std::endl;
466 out <<
" template< class QuadratureType,"<< std::endl;
467 out <<
" class RangeVectorType," << std::endl;
468 out <<
" class RangeFactorType," << std::endl;
469 out <<
" class LocalDofVectorType>" << std::endl;
470 out <<
" static void eval( const QuadratureType& quad," << std::endl;
471 out <<
" const RangeVectorType& rangeStorageTransposed," << std::endl;
472 out <<
" const LocalDofVectorType& dofs," << std::endl;
473 out <<
" RangeFactorType &rangeVector)" << std::endl;
474 out <<
" {" << std::endl;
480 out <<
" typedef " <<
doubletype() <<
" Field;" << std::endl;
482 out <<
" static thread_local std::vector< Field > memory( " << numRows * dimRange <<
" );" << std::endl;
486 out <<
" Field* resultTmp = memory.data();" << std::endl;
487 out <<
" for( int i=0; i < " << numRows * dimRange <<
"; ++ i ) resultTmp[ i ] = 0;" << std::endl <<std::endl;
489 for(
int r=0; r<dimRange ; ++r )
491 out <<
" Field* result" << r <<
" = resultTmp + " << r * numRows <<
" ;" << std::endl;
495 out <<
" const Field* baseData = rangeStorageTransposed.data();" << std::endl;
501 const size_t simdCols = simdWidth * ( numCols / simdWidth );
504 out <<
" for( int col = 0, dof = 0 ; col < "<< simdCols <<
" ; col += " << simdWidth <<
", dof += " << simdWidth * dimRange<<
" )"<<std::endl;
505 out <<
" {" << std::endl;
520 out <<
" " << funcName <<
"(" << std::endl;
522 for(
int i = 0; i< simdWidth * dimRange; ++i )
523 out <<
" dofs[ dof + " << i <<
" ],";
525 for(
int i=0; i< simdWidth; ++i )
526 out <<
" baseData + ((col + " << i <<
") * " << numRows <<
")," << std::endl;
528 for(
int r = 0; r < dimRange-1; ++r)
529 out <<
"result" << r <<
", ";
530 out <<
"result" << dimRange-1 <<
" );" << std::endl;
531 out <<
" }"<< std::endl;
535 if( numCols > simdCols )
537 out <<
" // remainder iteration" << std::endl;
538 out <<
" for( int col = " << simdCols <<
", dof = " << simdCols * dimRange <<
" ; col < " << numCols <<
" ; ++col )" << std::endl;
539 out <<
" {" << std::endl;
540 for(
int r=0; r<dimRange; ++r )
541 out <<
" const " <<
doubletype() <<
" dof0" << r <<
" = dofs[ dof++ ];" << std::endl;
542 out <<
" const Field* base0 = baseData + (col * " << numRows <<
");" << std::endl;
545 out <<
" }" << std::endl;
549 out <<
" // store result" << std::endl;
550 out <<
" for(int row = 0; row < " << numRows <<
" ; ++row )" << std::endl;
551 out <<
" {" << std::endl;
552 out <<
" auto& result = rangeVector[ row ];" << std::endl;
553 for(
int r = 0 ; r < dimRange; ++ r )
555 out <<
" result[ " << r <<
" ] = result" << r <<
"[ row ];" << std::endl;
557 out <<
" }" << std::endl;
558 out <<
" }" << std::endl << std::endl;
562 out <<
" static void " << funcName <<
"(" << std::endl;
563 for(
int i=0; i<simdWidth; ++i )
566 for(
int r=0; r<dimRange; ++ r )
568 if( r > 0 )
out <<
" ";
573 for(
int i=0; i<simdWidth; ++ i )
575 for(
int r=0; r<dimRange; ++ r )
578 if( r == dimRange-1 )
out <<
" )" << std::endl;
579 else out <<
"," << std::endl;
583 out <<
" {" << std::endl;
585 out <<
" }" << std::endl;
586 out <<
"};" << std::endl;
590 static void writeInnerLoop(std::ostream&
out,
const int simdW,
const int dimRange,
const size_t numCols )
592 for(
int i=0; i< simdW; ++i )
594 for(
int r=0; r< dimRange; ++r )
596 out <<
" const " <<
doubletype() <<
" fac" << i << r <<
" = rangeFactor" << i <<
"[ " << r <<
" ];" << std::endl;
600 out <<
" int rowCol = quad.cachingPoint( row ) * " << numCols <<
";" << std::endl;
601 out <<
" for(int col = 0 ; col < " << numCols <<
" ; ++ col";
604 out <<
" )" << std::endl;
605 out <<
" {" << std::endl;
606 for(
int i = 0 ; i< simdW ; ++ i )
609 out <<
" const " <<
doubletype() <<
" phi" << i <<
" = rangeStorage[ rowCol ][ 0 ];" << std::endl;
611 out <<
" const " <<
doubletype() <<
" phi" << i <<
" = base" << i <<
" [ col ];" << std::endl;
613 for(
int r = 0; r < dimRange; ++ r )
615 out <<
" dofs" << r <<
"[ col ] += phi0 * fac0" << r;
616 for(
int i=1; i<simdW; ++i )
618 out <<
" + phi" << i <<
" * fac" << i << r ;
620 out <<
" ;" << std::endl;
622 out <<
" }" << std::endl;
626 const int dim,
const int dimRange,
const size_t numRows,
const size_t numCols )
628 const std::string funcName =
633 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
634 out <<
"struct AxpyRanges<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
635 out <<
"{" << std::endl;
638 out <<
" template< class QuadratureType,"<< std::endl;
639 out <<
" class RangeVectorType," << std::endl;
640 out <<
" class RangeFactorType," << std::endl;
641 out <<
" class LocalDofVectorType>" << std::endl;
642 out <<
" static void axpy( const QuadratureType& quad," << std::endl;
643 out <<
" const RangeVectorType& rangeStorage," << std::endl;
644 out <<
" const RangeFactorType& rangeFactors," << std::endl;
645 out <<
" LocalDofVectorType& dofs)" << std::endl;
646 out <<
" {" << std::endl;
655 out <<
" typedef " <<
doubletype() <<
" Field;" << std::endl;
656 out <<
" static thread_local std::vector< Field > memory( " << numCols * dimRange <<
" );" << std::endl;
660 out <<
" " <<
doubletype() <<
"* dofResult = memory.data();" << std::endl;
661 out <<
" for( int i=0; i < " << numCols * dimRange <<
"; ++i ) dofResult[ i ] = 0;" << std::endl << std::endl;
663 out <<
" " <<
doubletype() <<
"* dofs0 = dofResult;" << std::endl;
664 for(
int r = 1; r < dimRange; ++ r )
665 out <<
" " <<
doubletype() <<
"* dofs" << r <<
" = dofResult + " << r * numCols <<
";" << std::endl;
668 const size_t simdRows = simdWidth * (numRows / simdWidth) ;
672 out <<
" for( int row = 0; row < "<< simdRows <<
" ; row += " << int(simdWidth) <<
" )" << std::endl;
673 out <<
" {" << std::endl;
674 for(
int i=0; i<simdWidth; ++ i )
675 out <<
" const " <<
doubletype() <<
"* rangeFactor" << i <<
" = &rangeFactors[ row + " << i <<
" ][ 0 ];" << std::endl;
676 out <<
" " << funcName <<
"(";
677 for(
int i = 0; i < simdWidth; ++i )
681 out <<
" &rangeStorage[ quad.cachingPoint( row + " << i <<
" ) * " << numCols <<
" ][ 0 ]," << std::endl;
683 out <<
" rangeFactor0, ";
684 for(
int i=1; i<simdWidth; ++ i )
685 out <<
"rangeFactor" << i <<
",";
688 for(
int r = 0; r < dimRange; ++ r )
690 out <<
" dofs" << r ;
691 if( r == dimRange-1 )
692 out <<
" );" << std::endl;
697 out <<
" }" << std::endl;
701 if( numRows > simdRows )
703 out <<
" // remainder iteration" << std::endl;
704 out <<
" for( int row = " << simdRows <<
" ; row < " << numRows <<
" ; ++row )" << std::endl;
705 out <<
" {" << std::endl;
706 out <<
" const " <<
doubletype() <<
"* rangeFactor0 = &rangeFactors[ row ][ 0 ];" << std::endl;
708 out <<
" }" << std::endl;
712 out <<
" // sum up results (transform from variable based to point based layout)" << std::endl;
713 out <<
" for( int col = 0, dof = 0 ; col < "<< numCols <<
" ; ++col )" << std::endl;
714 out <<
" {" << std::endl;
715 for(
int r = 0 ; r < dimRange; ++ r )
716 out <<
" dofs[ dof++ ] += dofs" << r <<
"[ col ];" << std::endl;
717 out <<
" }" << std::endl;
719 out <<
" }" << std::endl << std::endl;
726 out <<
" static void " << funcName <<
"(" << std::endl;
728 for(
int i=1; i<simdWidth; ++ i )
730 for(
int i=0; i<simdWidth; ++ i )
732 for(
int r = 0; r < dimRange; ++r )
735 if( r == dimRange-1 )
736 out <<
" )" << std::endl;
738 out <<
"," << std::endl;
741 out <<
" {" << std::endl;
743 out <<
" }" << std::endl;
744 out <<
"};" << std::endl;
749 const int dimRange,
const size_t numRows,
const size_t numCols )
751 out <<
" for(int row = 0; row < " << numRows <<
" ; ++row )" << std::endl;
752 out <<
" {" << std::endl;
777 for(
int i = 0 ; i< simdW ; ++ i )
780 for(
int d = 0 ; d < dim; ++ d )
781 out <<
" const " <<
doubletype() <<
" phi" << i << d <<
" = base" << i <<
"[ row * " << dim <<
" + " << d <<
" ];" << std::endl;
787 out <<
" const int idx = row * " << dim<<
";" << std::endl;
788 for(
int d = 0; d < dim ; ++ d )
790 for(
int i = 0 ; i< simdW ; ++ i )
791 out <<
" const " <<
doubletype() <<
" phi" << i << d <<
" = base" << i <<
"[ idx + " << d <<
" ];" << std::endl;
794 for(
int d = 0; d < dim ; ++ d )
796 for(
int r = 0; r < dimRange; ++ r )
798 out <<
" result" << r << d <<
"[ row ] += phi0" << d <<
" * dof0" << r;
799 for(
int i=1; i<simdW; ++i )
800 out <<
" + phi" << i << d <<
" * dof" << i << r;
801 out <<
" ;" << std::endl;
804 out <<
" }" << std::endl;
808 const int dim,
const int dimRange,
const size_t numRows,
const size_t numCols )
810 const std::string funcName =
815 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
816 out <<
"struct EvaluateJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
817 out <<
"{" << std::endl;
818 out <<
" template< class QuadratureType,"<< std::endl;
819 out <<
" class JacobianRangeVectorType," << std::endl;
820 out <<
" class LocalDofVectorType," << std::endl;
821 out <<
" class JacobianRangeFactorType>" << std::endl;
822 out <<
" static void eval( const QuadratureType&," << std::endl;
823 out <<
" const EmptyGeometry&," << std::endl;
824 out <<
" const JacobianRangeVectorType&," << std::endl;
825 out <<
" const LocalDofVectorType&," << std::endl;
826 out <<
" JacobianRangeFactorType &)" << std::endl;
827 out <<
" {" << std::endl;
828 out <<
" std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
829 out <<
" abort();" << std::endl;
830 out <<
" }" << std::endl;
831 out <<
"};" << std::endl << std::endl;
832 out <<
"template <class BaseFunctionSet, class Geometry> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
833 out <<
"struct EvaluateJacobians<BaseFunctionSet, Geometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
834 out <<
"{" << std::endl;
835 out <<
" template< class QuadratureType,"<< std::endl;
836 out <<
" class JacobianRangeVectorType," << std::endl;
837 out <<
" class LocalDofVectorType," << std::endl;
838 out <<
" class JacobianRangeFactorType>" << std::endl;
839 out <<
" static void eval( const QuadratureType& quad," << std::endl;
840 out <<
" const Geometry& geometry," << std::endl;
841 out <<
" const JacobianRangeVectorType& jacStorage," << std::endl;
842 out <<
" const LocalDofVectorType& dofs," << std::endl;
843 out <<
" JacobianRangeFactorType& jacFactors)" << std::endl;
844 out <<
" {" << std::endl;
845 out <<
" evalJac( quad, geometry, jacStorage, dofs, jacFactors, jacFactors[ 0 ] );" << std::endl;
846 out <<
" }" << std::endl;
847 out <<
"private:" << std::endl;
848 out <<
" template< class QuadratureType,"<< std::endl;
849 out <<
" class JacobianRangeVectorType," << std::endl;
850 out <<
" class LocalDofVectorType," << std::endl;
851 out <<
" class JacobianRangeFactorType," << std::endl;
852 out <<
" class GlobalJacobianRangeType>" << std::endl;
853 out <<
" static void evalJac( const QuadratureType& quad," << std::endl;
854 out <<
" const Geometry& geometry," << std::endl;
855 out <<
" const JacobianRangeVectorType& jacStorage," << std::endl;
856 out <<
" const LocalDofVectorType& dofs," << std::endl;
857 out <<
" JacobianRangeFactorType& jacVector," << std::endl;
858 out <<
" const GlobalJacobianRangeType&)" << std::endl;
859 out <<
" {" << std::endl;
863 out <<
" typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
865 const size_t nDofs = numRows * dimRange * dim ;
866 out <<
" typedef " <<
doubletype() <<
" Field;" << std::endl;
867 out <<
" static thread_local std::vector< Field > memory( " << nDofs <<
" );" << std::endl;
870 for(
int d = 0; d < dim ; ++ d )
874 out <<
" Field* resultTmp" << d <<
" = memory.data() + " << d * numRows * dimRange <<
";" << std::endl;
876 out <<
" for( int i=0; i<" << numRows * dimRange <<
"; ++i ) " << std::endl;
877 out <<
" {" << std::endl;
878 for(
int d = 0; d < dim ; ++ d )
879 out <<
" resultTmp" << d <<
"[ i ] = 0;" << std::endl;
880 out <<
" }" << std::endl << std::endl;
882 for(
int d = 0; d < dim ; ++ d )
884 for(
int r=0; r<dimRange ; ++r )
886 out <<
" Field* result" << r << d <<
" = resultTmp" << d <<
" + " << r * numRows <<
" ;" << std::endl;
891 const size_t simdNumCols = simdWidth * ( numCols / simdWidth );
892 out <<
" typedef typename GlobalJacobianRangeType :: row_type JacobianRangeType;" << std::endl;
893 out <<
" const " <<
doubletype() <<
"* jacobians = jacStorage.data();" << std::endl;
894 if( simdNumCols > 0 )
909 out <<
" for( int col = 0, dof = 0 ; col < "<< simdNumCols <<
" ; col += " << simdWidth <<
", dof += " << simdWidth * dimRange<<
" )"<<std::endl;
910 out <<
" {" << std::endl;
943 for(
int i=0; i< simdWidth; ++ i )
945 out <<
" const "<<
doubletype() <<
"* base" << i <<
" = jacobians + (" << dim * numRows <<
" * (col + "<< i <<
"));" << std::endl;
948 out <<
" " << funcName <<
"(";
949 for(
int i = 0; i< simdWidth * dimRange; ++i )
950 out <<
" dofs[ dof + " << i <<
" ],";
951 out << std::endl <<
" ";
952 for(
int i=0; i< simdWidth; ++i )
953 out <<
"base" << i <<
", ";
954 out << std::endl <<
" ";
955 for(
int d = 0; d < dim; ++ d )
957 for(
int r = 0; r < dimRange; ++r)
959 out <<
"result" << r << d;
960 if( (r == dimRange - 1) && (d == dim-1 ) )
out << std::endl;
964 out <<
" );" << std::endl;
965 out <<
" }"<< std::endl;
970 if( numCols > simdNumCols )
972 out <<
" // remainder iteration" << std::endl;
973 out <<
" for( int col = " << simdNumCols <<
", dof = " << simdNumCols * dimRange <<
" ; col < " << numCols <<
" ; ++col )" << std::endl;
974 out <<
" {" << std::endl;
975 out <<
" const "<<
doubletype() <<
"* base0" <<
" = jacobians + (" << dim * numRows <<
" * col);" << std::endl;
976 for(
int r=0; r<dimRange; ++r )
977 out <<
" const " <<
doubletype() <<
" dof0" << r <<
" = dofs[ dof++ ];" << std::endl;
979 out <<
" }" << std::endl;
983 out <<
" // store result" << std::endl;
984 out <<
" JacobianRangeType tmp;" << std::endl;
985 out <<
" for(int row = 0; row < " << numRows <<
" ; ++row )" << std::endl;
986 out <<
" {" << std::endl;
987 out <<
" const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
988 out <<
" GlobalJacobianRangeType& result = jacVector[ row ];" << std::endl;
989 for(
int r = 0 ; r < dimRange; ++ r )
991 for(
int d = 0 ; d < dim; ++ d )
993 out <<
" tmp[ " << d <<
" ] = result" << r << d <<
"[ row ];" << std::endl;
1000 out <<
" gjit.mv( tmp, result[ "<< r <<
" ] );" << std::endl;
1002 out <<
" }" << std::endl;
1003 out <<
" }" << std::endl << std::endl;
1007 out <<
" static void " << funcName <<
"(" << std::endl;
1008 for(
int i=0; i<simdWidth; ++i )
1011 for(
int r=0; r<dimRange; ++ r )
1015 for(
int i=0; i<simdWidth; ++ i )
1017 for(
int d=0; d<dim; ++ d )
1019 for(
int r=0; r<dimRange; ++ r )
1022 if( (r == dimRange - 1) && (d == dim-1 ) )
out <<
" )" << std::endl;
1023 else out <<
"," << std::endl;
1028 out <<
" {" << std::endl;
1030 out <<
" }" << std::endl;
1031 out <<
"};" << std::endl;
1037 out <<
" for( int col = 0; col < " << numCols <<
" ; ++col )" << std::endl;
1038 out <<
" {" << std::endl;
1039 for(
int d =0; d < dim; ++d )
1040 out <<
" const " <<
doubletype() <<
" phi" << d <<
" = base[ (col * " << dim <<
") + " << d <<
" ];" << std::endl;
1042 for(
int r = 0; r < dimRange; ++r )
1044 out <<
" result" << r <<
"[ col ] += phi0 * jacFactorInv0" << r;
1045 for(
int d=1; d < dim; ++d )
1046 out <<
" + phi" << d <<
" * jacFactorInv" << d << r;
1047 out <<
";" << std::endl;
1049 out <<
" }" << std::endl;
1053 const int dim,
const int dimRange,
const size_t numRows,
const size_t numCols )
1055 const std::string funcName =
1060 out <<
"template <class BaseFunctionSet> // dimRange = "<< dimRange <<
", quadNop = " << numRows <<
", scalarBasis = " << numCols << std::endl;
1061 out <<
"struct AxpyJacobians<BaseFunctionSet, EmptyGeometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
1062 out <<
"{" << std::endl;
1063 out <<
" template< class QuadratureType,"<< std::endl;
1064 out <<
" class JacobianRangeVectorType," << std::endl;
1065 out <<
" class JacobianRangeFactorType," << std::endl;
1066 out <<
" class LocalDofVectorType>" << std::endl;
1067 out <<
" static void axpy( const QuadratureType&," << std::endl;
1068 out <<
" const EmptyGeometry&," << std::endl;
1069 out <<
" const JacobianRangeVectorType&," << std::endl;
1070 out <<
" const JacobianRangeFactorType&," << std::endl;
1071 out <<
" LocalDofVectorType&)" << std::endl;
1072 out <<
" {" << std::endl;
1073 out <<
" std::cerr << \"ERROR: wrong code generated for VectorialBaseFunctionSet::axpyJacobians\" << std::endl;" << std::endl;
1074 out <<
" abort();" << std::endl;
1075 out <<
" }" << std::endl;
1076 out <<
"};" << std::endl << std::endl;
1077 out <<
"template <class BaseFunctionSet, class Geometry>" << std::endl;
1078 out <<
"struct AxpyJacobians<BaseFunctionSet, Geometry, " << dimRange <<
", " << numRows <<
", " << numCols <<
">" << std::endl;
1079 out <<
"{" << std::endl;
1081 out <<
" template< class QuadratureType,"<< std::endl;
1082 out <<
" class JacobianRangeVectorType," << std::endl;
1083 out <<
" class JacobianRangeFactorType," << std::endl;
1084 out <<
" class LocalDofVectorType>" << std::endl;
1085 out <<
" static void axpy( const QuadratureType& quad," << std::endl;
1086 out <<
" const Geometry& geometry," << std::endl;
1087 out <<
" const JacobianRangeVectorType& jacStorage," << std::endl;
1088 out <<
" const JacobianRangeFactorType& jacFactors," << std::endl;
1089 out <<
" LocalDofVectorType& dofs)" << std::endl;
1090 out <<
" {" << std::endl;
1091 out <<
" typedef typename JacobianRangeFactorType :: value_type JacobianRangeType;" << std::endl << std::endl;
1093 const size_t dofs = dimRange * numCols ;
1094 out <<
" typedef " <<
doubletype() <<
" Field;" << std::endl;
1095 out <<
" static thread_local std::vector< Field > memory( " <<
dofs <<
" );" << std::endl;
1099 out <<
" Field* result = memory.data();" << std::endl;
1100 out <<
" for( int i = 0 ; i < " <<
dofs <<
"; ++i ) result[ i ] = 0;" << std::endl << std::endl;
1102 for(
int r=0; r<dimRange; ++r )
1103 out <<
" Field* result" << r <<
" = result + " << r * numCols <<
";" << std::endl;
1111 out <<
" const Field* base = jacStorage.data();" << std::endl << std::endl;
1112 out <<
" JacobianRangeType jacFactorTmp;" << std::endl;
1113 out <<
" for( int row = 0; row < " << numRows <<
" ; ++ row )" << std::endl;
1114 out <<
" {" << std::endl;
1115 out <<
" typedef typename Geometry::JacobianInverseTransposed GeometryJacobianType;" << std::endl;
1116 out <<
" // use reference to GeometryJacobianType to make code compile with SPGrid Geometry" << std::endl;
1117 out <<
" const GeometryJacobianType& gjit = geometry.jacobianInverseTransposed( quad.point( row ) );" << std::endl << std::endl;
1118 out <<
" for( int r = 0; r < " << dimRange <<
" ; ++r )" << std::endl;
1119 out <<
" {"<<std::endl;
1120 out <<
" gjit.mtv( jacFactors[ row ][ r ], jacFactorTmp[ r ] );" << std::endl;
1121 out <<
" }" << std::endl << std::endl;
1132 out <<
" // calculate updates" << std::endl;
1133 out <<
" " << funcName <<
"(";
1135 out <<
" base + ( quad.localCachingPoint( row ) * " << numCols * dim <<
" )," << std::endl;
1136 for(
int i =0; i < dim; ++i )
1139 for(
int r = 0; r < dimRange; ++ r )
1140 out <<
" jacFactorTmp[ " << r <<
" ][ " << i <<
" ], ";
1144 for(
int r = 0; r < dimRange; ++ r )
1146 out <<
" result" << r;
1147 if( r == dimRange -1 )
1148 out <<
" );" << std::endl ;
1152 out <<
" }" << std::endl << std::endl;
1154 out <<
" // sum up results (transform from variable based to point based layout)" << std::endl;
1155 out <<
" for( int col = 0, dof = 0 ; col < "<< numCols <<
" ; ++col )" << std::endl;
1156 out <<
" {" << std::endl;
1157 for(
int r = 0 ; r < dimRange; ++ r )
1158 out <<
" dofs[ dof++ ] += result" << r <<
"[ col ];" << std::endl;
1159 out <<
" }" << std::endl;
1161 out <<
" }" << std::endl << std::endl;
1170 out <<
" static void " << funcName <<
"(" << std::endl;
1174 for(
int i=0; i<dim; ++i )
1177 for(
int r=0; r<dimRange; ++ r )
1178 out <<
"const " <<
doubletype() <<
" jacFactorInv"<< i << r <<
", ";
1181 for(
int r = 0; r < dimRange; ++r )
1184 if( r == dimRange-1 )
1185 out <<
" )" << std::endl;
1187 out <<
"," << std::endl;
1190 out <<
" {" << std::endl;
1192 out <<
" }" << std::endl;
1193 out <<
"};" << std::endl;
1200#ifndef FEM_CODEGENERATOR_REPLACEMENT
1208 std::stringstream name;
1209 name <<
"autogeneratedcode_" << dim <<
"d_k" << polynomialOrder <<
".hh";
1216 template <
class DiscreteFunctionSpace,
class Vector>
1218 const Vector& elemQuadOrders,
1219 const Vector& faceQuadOrders,
1220 const std::string& outpath =
"./",
1221 const std::string& filename =
"autogeneratedcode.hh" )
1223 using namespace Codegen;
1225 const int dimRange = DiscreteFunctionSpace :: dimRange;
1226 const int dimDomain = DiscreteFunctionSpace :: dimDomain;
1227 const int dimGrad = dimRange*dimDomain;
1229 typedef typename DiscreteFunctionSpace :: GridPartType GridPartType;
1235 std::set< int > sizes;
1236 std::set< int > quadNops;
1237 for(
const auto& entity : space )
1240 const int scalarSize = space.basisFunctionSet( entity ).size() / dimRange ;
1241 sizes.insert( scalarSize );
1243 const auto iend = space.gridPart().iend( entity );
1244 for(
auto it = space.gridPart().ibegin( entity ); it != iend; ++it )
1246 for(
const auto& quadOrd : faceQuadOrders )
1248 FaceQuadratureType quad( space.gridPart(), *it, quadOrd, FaceQuadratureType::INSIDE );
1249 quadNops.insert( quad.nop() );
1254 for(
const auto& type : space.indexSet().types(0))
1256 for(
const auto& quadOrd : elemQuadOrders )
1258 ElementQuadratureType quad( type, quadOrd );
1259 quadNops.insert( quad.nop() );
1263 std::string
path ( outpath );
1264 path +=
"/autogeneratedcode";
1267 CodegenInfo&
info = CodegenInfo::instance();
1271 info.addDimRange( &space, dimRange );
1273 info.addDimRange( &gradSpace, dimGrad );
1275 for(
const auto& size : sizes )
1277 for(
const auto& quadNop : quadNops )
1279 info.addEntry(
"evalranges",
1280 CodeGeneratorType :: evaluateCodegen, dimDomain, dimRange, quadNop, size );
1281 info.addEntry(
"evaljacobians",
1282 CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimRange, quadNop, size );
1283 info.addEntry(
"axpyranges",
1284 CodeGeneratorType :: axpyCodegen, dimDomain, dimRange, quadNop, size );
1285 info.addEntry(
"axpyjacobians",
1286 CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop, size );
1288 info.addEntry(
"evalranges",
1289 CodeGeneratorType :: evaluateCodegen, dimDomain, dimGrad, quadNop, size );
1290 info.addEntry(
"evaljacobians",
1291 CodeGeneratorType :: evaluateJacobiansCodegen, dimDomain, dimGrad, quadNop, size );
1292 info.addEntry(
"axpyranges",
1293 CodeGeneratorType :: axpyCodegen, dimDomain, dimGrad, quadNop, size );
1294 info.addEntry(
"axpyjacobians",
1295 CodeGeneratorType :: axpyJacobianCodegen, dimDomain, dimRange, quadNop, size );
1302 info.setFileName( filename );
1303 bool written =
info.dumpInfo();
1308 std::cout <<
"Written code to " << filename << std::endl;
1313 std::ofstream file( outpath +
"/" + filename );
1317 std::string header( filename );
1318 size_t size = header.size();
1320 for(
size_t i=0; i<size; ++i )
1322 if( header[ i ] ==
'.' )
1326 file <<
"#ifndef " << header <<
"_INCLUDED" << std::endl;
1327 file <<
"#define " << header <<
"_INCLUDED" << std::endl;
1328 file <<
"#ifndef USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1329 file <<
"#define USE_BASEFUNCTIONSET_CODEGEN" << std::endl;
1330 file <<
"#endif" << std::endl;
1331 file <<
"// this is the file containing the necessary includes for the specialized codes" << std::endl;
1332 file <<
"#define DUNE_FEM_INCLUDE_AUTOGENERATEDCODE_FILENAME_SPEC \"" <<
path <<
"/" << filename <<
"\"" << std::endl;
1333 file <<
"#endif" << std::endl;
1339 std::cout <<
"No changes written to " << filename << std::endl << std::endl;
std::string path
Definition: readioparams.cc:156
double max(const Dune::Fem::Double &v, const double p)
Definition: double.hh:965
double min(const Dune::Fem::Double &v, const double p)
Definition: double.hh:953
Definition: bindguard.hh:11
bool createDirectory(const std::string &inName)
create a directory
Definition: io.cc:19
IteratorRange< typename DF::DofIteratorType > dofs(DF &df)
Iterates over all DOFs.
Definition: rangegenerators.hh:76
void generateCode(const DiscreteFunctionSpace &space, const Vector &elemQuadOrders, const Vector &faceQuadOrders, const std::string &outpath="./", const std::string &filename="autogeneratedcode.hh")
Definition: codegen.hh:1217
std::string autoFilename(const int dim, const int polynomialOrder)
Definition: codegen.hh:1206
DefaultCodeGenerator< 4 > CodeGeneratorType
Definition: codegen.hh:1201
Container for User Specified Parameters.
Definition: io/parameter.hh:191
Definition: grcommon.hh:31
Definition: codegen.hh:29
Definition: codegen.hh:32
void setPath(const std::string &path)
overwrite path
Definition: codegen.hh:96
CodegenInfo()
Definition: codegen.hh:67
void finalize() const
Definition: codegen.hh:207
void setFileName(const std::string &filename)
overwrite filename
Definition: codegen.hh:99
void addEntry(const std::string &fileprefix, codegenfunc_t *codegenfunc, const int dim, const int dimRange, const int quadNop, const int numBase)
Definition: codegen.hh:116
void addDimRange(const BaseSet *baseSet, const int dimRange)
Definition: codegen.hh:102
static CodegenInfo & instance()
Definition: codegen.hh:77
void clear()
clear all registered entries
Definition: codegen.hh:83
bool dumpInfo(const bool writeToCurrentDir=false) const
Definition: codegen.hh:218
int exists(const std::string &filename) const
Definition: codegen.hh:328
bool checkAbort() const
Definition: codegen.hh:340
default code generator methods
Definition: codegen.hh:368
static std::string generateFunctionName(const std::string &prefix, const int simdW, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:415
static void writeInnerLoopEval(std::ostream &out, const int simdW, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:424
static void writeInnerLoop(std::ostream &out, const int simdW, const int dimRange, const size_t numCols)
Definition: codegen.hh:590
static void evaluateJacobiansCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:807
static void writePreCompHeader(std::ostream &out, const int stage)
Definition: codegen.hh:383
static void axpyCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:625
static void writeInnerJacEvalLoop(std::ostream &out, const int simdW, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:748
static const char * doubletype()
Definition: codegen.hh:374
static void evaluateCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:452
static const char * restrictKey()
Definition: codegen.hh:369
static void axpyJacobianCodegen(std::ostream &out, const int dim, const int dimRange, const size_t numRows, const size_t numCols)
Definition: codegen.hh:1052
static void writeInnerLoopAxpyJac(std::ostream &out, const int dim, const int dimRange, const size_t numCols)
Definition: codegen.hh:1035
return singleton instance of given Object type.
Definition: singleton.hh:88