Chaste Release::3.1
|
00001 /* 00002 00003 Copyright (c) 2005-2012, University of Oxford. 00004 All rights reserved. 00005 00006 University of Oxford means the Chancellor, Masters and Scholars of the 00007 University of Oxford, having an administrative office at Wellington 00008 Square, Oxford OX1 2JD, UK. 00009 00010 This file is part of Chaste. 00011 00012 Redistribution and use in source and binary forms, with or without 00013 modification, are permitted provided that the following conditions are met: 00014 * Redistributions of source code must retain the above copyright notice, 00015 this list of conditions and the following disclaimer. 00016 * Redistributions in binary form must reproduce the above copyright notice, 00017 this list of conditions and the following disclaimer in the documentation 00018 and/or other materials provided with the distribution. 00019 * Neither the name of the University of Oxford nor the names of its 00020 contributors may be used to endorse or promote products derived from this 00021 software without specific prior written permission. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 00034 */ 00035 00036 #include <iostream> 00037 00038 #include "PetscVecTools.hpp" // Includes Ublas so must come first 00039 #include "PCLDUFactorisation.hpp" 00040 #include "Exception.hpp" 00041 00042 PCLDUFactorisation::PCLDUFactorisation(KSP& rKspObject) 00043 { 00044 #ifdef TRACE_KSP 00045 mPCContext.mScatterTime = 0.0; 00046 mPCContext.mA1PreconditionerTime = 0.0;; 00047 mPCContext.mA2PreconditionerTime = 0.0;; 00048 mPCContext.mGatherTime = 0.0;; 00049 #endif 00050 00051 PCLDUFactorisationCreate(rKspObject); 00052 PCLDUFactorisationSetUp(); 00053 } 00054 00055 PCLDUFactorisation::~PCLDUFactorisation() 00056 { 00057 #ifdef TRACE_KSP 00058 if (PetscTools::AmMaster()) 00059 { 00060 std::cout << " -- LDU factorisation preconditioner profile information: " << std::endl; 00061 std::cout << "\t mScatterTime: " << mPCContext.mScatterTime << std::endl; 00062 std::cout << "\t mA1PreconditionerTime: " << mPCContext.mA1PreconditionerTime << std::endl; 00063 std::cout << "\t mA2PreconditionerTime: " << mPCContext.mA2PreconditionerTime << std::endl; 00064 std::cout << "\t mExtraLAOperations: " << mPCContext.mExtraLAOperations << std::endl; 00065 std::cout << "\t mGatherTime: " << mPCContext.mGatherTime << std::endl; 00066 } 00067 #endif 00068 00069 PetscTools::Destroy(mPCContext.A11_matrix_subblock); 00070 PetscTools::Destroy(mPCContext.A22_matrix_subblock); 00071 PetscTools::Destroy(mPCContext.B_matrix_subblock); 00072 00073 PCDestroy(PETSC_DESTROY_PARAM(mPCContext.PC_amg_A11)); 00074 PCDestroy(PETSC_DESTROY_PARAM(mPCContext.PC_amg_A22)); 00075 00076 PetscTools::Destroy(mPCContext.x1_subvector); 00077 PetscTools::Destroy(mPCContext.y1_subvector); 00078 PetscTools::Destroy(mPCContext.x2_subvector); 00079 PetscTools::Destroy(mPCContext.y2_subvector); 00080 PetscTools::Destroy(mPCContext.z); 00081 PetscTools::Destroy(mPCContext.temp); 00082 00083 VecScatterDestroy(PETSC_DESTROY_PARAM(mPCContext.A11_scatter_ctx)); 00084 VecScatterDestroy(PETSC_DESTROY_PARAM(mPCContext.A22_scatter_ctx)); 00085 } 00086 00087 void PCLDUFactorisation::PCLDUFactorisationCreate(KSP& rKspObject) 00088 { 00089 KSPGetPC(rKspObject, &mPetscPCObject); 00090 00091 Mat system_matrix, dummy; 00092 MatStructure flag; 00093 KSPGetOperators(rKspObject, &system_matrix, &dummy, &flag); 00094 00095 PetscInt num_rows, num_columns; 00096 MatGetSize(system_matrix, &num_rows, &num_columns); 00097 00098 PetscInt num_local_rows, num_local_columns; 00099 MatGetLocalSize(system_matrix, &num_local_rows, &num_local_columns); 00100 00101 // Odd number of rows: impossible in Bidomain. 00102 // Odd number of local rows: impossible if V_m and phi_e for each node are stored in the same processor. 00103 if ((num_rows%2 != 0) || (num_local_rows%2 != 0)) 00104 { 00105 TERMINATE("Wrong matrix parallel layout detected in PCLDUFactorisation."); 00106 } 00107 00108 // Allocate memory 00109 unsigned subvector_num_rows = num_rows/2; 00110 unsigned subvector_local_rows = num_local_rows/2; 00111 mPCContext.x1_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows); 00112 mPCContext.x2_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows); 00113 mPCContext.y1_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows); 00114 mPCContext.y2_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows); 00115 mPCContext.z = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows); 00116 mPCContext.temp = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows); 00117 00118 // Create scatter contexts 00119 { 00120 // Needed by VecScatterCreate in order to find out parallel layout. 00121 Vec dummy_vec = PetscTools::CreateVec(num_rows, num_local_rows); 00122 00123 PetscVecTools::SetupInterleavedVectorScatterGather(dummy_vec, mPCContext.A11_scatter_ctx, mPCContext.A22_scatter_ctx); 00124 00125 PetscTools::Destroy(dummy_vec); 00126 } 00127 00128 // Get matrix sublock A11 00129 { 00130 // Work out local row range for subblock A11 (same as x1 or y1) 00131 PetscInt low, high, global_size; 00132 VecGetOwnershipRange(mPCContext.x1_subvector, &low, &high); 00133 VecGetSize(mPCContext.x1_subvector, &global_size); 00134 assert(global_size == num_rows/2); 00135 00136 IS A11_local_rows; 00137 IS A11_columns; 00138 ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low, 2, &A11_local_rows); 00139 ISCreateStride(PETSC_COMM_WORLD, global_size, 0, 2, &A11_columns); 00140 00141 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later 00142 MatGetSubMatrix(system_matrix, A11_local_rows, A11_local_rows, 00143 MAT_INITIAL_MATRIX, &mPCContext.A11_matrix_subblock); 00144 #else 00145 MatGetSubMatrix(system_matrix, A11_local_rows, A11_columns, PETSC_DECIDE, 00146 MAT_INITIAL_MATRIX, &mPCContext.A11_matrix_subblock); 00147 #endif 00148 ISDestroy(PETSC_DESTROY_PARAM(A11_local_rows)); 00149 ISDestroy(PETSC_DESTROY_PARAM(A11_columns)); 00150 } 00151 00152 // Get matrix sublock A22 00153 { 00154 // Work out local row range for subblock A22 (same as x2 or y2) 00155 PetscInt low, high, global_size; 00156 VecGetOwnershipRange(mPCContext.x2_subvector, &low, &high); 00157 VecGetSize(mPCContext.x2_subvector, &global_size); 00158 assert(global_size == num_rows/2); 00159 00160 IS A22_local_rows; 00161 IS A22_columns; 00162 ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low+1, 2, &A22_local_rows); 00163 ISCreateStride(PETSC_COMM_WORLD, global_size, 1, 2, &A22_columns); 00164 00165 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later 00166 MatGetSubMatrix(system_matrix, A22_local_rows, A22_local_rows, 00167 MAT_INITIAL_MATRIX, &mPCContext.A22_matrix_subblock); 00168 #else 00169 MatGetSubMatrix(system_matrix, A22_local_rows, A22_columns, PETSC_DECIDE, 00170 MAT_INITIAL_MATRIX, &mPCContext.A22_matrix_subblock); 00171 #endif 00172 00173 ISDestroy(PETSC_DESTROY_PARAM(A22_local_rows)); 00174 ISDestroy(PETSC_DESTROY_PARAM(A22_columns)); 00175 } 00176 00177 // Get matrix sublock B (the upper triangular one) 00178 { 00179 // Work out local row range for subblock B (same as A11, x1 or y1) 00180 PetscInt low, high, global_size; 00181 VecGetOwnershipRange(mPCContext.x1_subvector, &low, &high); 00182 VecGetSize(mPCContext.x1_subvector, &global_size); 00183 assert(global_size == num_rows/2); 00184 00185 IS B_local_rows; 00186 IS B_columns; 00187 ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low, 2, &B_local_rows); 00188 00189 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later 00190 ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low+1, 2, &B_columns); 00191 MatGetSubMatrix(system_matrix, B_local_rows, B_columns, 00192 MAT_INITIAL_MATRIX, &mPCContext.B_matrix_subblock); 00193 #else 00194 ISCreateStride(PETSC_COMM_WORLD, global_size, 1, 2, &B_columns); 00195 MatGetSubMatrix(system_matrix, B_local_rows, B_columns, PETSC_DECIDE, 00196 MAT_INITIAL_MATRIX, &mPCContext.B_matrix_subblock); 00197 #endif 00198 00199 ISDestroy(PETSC_DESTROY_PARAM(B_local_rows)); 00200 ISDestroy(PETSC_DESTROY_PARAM(B_columns)); 00201 } 00202 00203 /* 00204 * Experimental (#1082): in PP removing the mass matrix from the A22 block seems to work better. 00205 * This is equivalent to do A22 = A22 + B in this implementation. 00206 */ 00207 //#if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2 00208 // PetscScalar petsc_one = 1.0; 00209 // MatAXPY(&petsc_one, mPCContext.B_matrix_subblock, mPCContext.A22_matrix_subblock, DIFFERENT_NONZERO_PATTERN); 00210 // #else 00211 // MatAXPY(mPCContext.A22_matrix_subblock, 1.0, mPCContext.B_matrix_subblock, DIFFERENT_NONZERO_PATTERN); 00212 //#endif 00213 00214 // // Shift the block 00215 //#if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2 00216 // PetscScalar shift = -1e-8; 00217 // MatShift(&shift, mPCContext.A22_matrix_subblock); 00218 // #else 00219 // PetscScalar shift = -1e-8; 00220 // MatShift(mPCContext.A22_matrix_subblock, shift); 00221 //#endif 00222 00223 PCSetType(mPetscPCObject, PCSHELL); 00224 #if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2 00225 // Register PC context and call-back function 00226 PCShellSetApply(mPetscPCObject, PCLDUFactorisationApply, (void*) &mPCContext); 00227 #else 00228 // Register PC context so it gets passed to PCBlockDiagonalApply 00229 PCShellSetContext(mPetscPCObject, &mPCContext); 00230 // Register call-back function 00231 PCShellSetApply(mPetscPCObject, PCLDUFactorisationApply); 00232 #endif 00233 } 00234 00235 void PCLDUFactorisation::PCLDUFactorisationSetUp() 00236 { 00237 // These options will get read by PCSetFromOptions 00238 // PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter", "1"); 00239 // PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); 00240 // PetscOptionsSetValue("-pc_hypre_type", "boomeramg"); 00241 00242 /* 00243 * Set up preconditioner for block A11 00244 */ 00245 PCCreate(PETSC_COMM_WORLD, &(mPCContext.PC_amg_A11)); 00246 PCSetOperators(mPCContext.PC_amg_A11, mPCContext.A11_matrix_subblock, mPCContext.A11_matrix_subblock, SAME_PRECONDITIONER); 00247 00248 // Choose between the two following blocks in order to approximate inv(A11) with one AMG cycle 00249 // or with an CG solve with high tolerance 00251 // PCSetType(mPCContext.PC_amg_A11, PCBJACOBI); 00252 00253 PCSetType(mPCContext.PC_amg_A11, PCHYPRE); 00254 //PCHYPRESetType(mPCContext.PC_amg_A11, "euclid"); 00255 PetscOptionsSetValue("-pc_hypre_type", "euclid"); 00256 PetscOptionsSetValue("-pc_hypre_euclid_levels", "0"); 00257 00258 // PCSetType(mPCContext.PC_amg_A11, PCHYPRE); 00259 // PetscOptionsSetValue("-pc_hypre_type", "boomeramg"); 00260 // PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter", "1"); 00261 // PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); 00262 // PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "HMIS"); 00263 00265 // PCSetType(mPCContext.PC_amg_A11, PCKSP); 00266 // KSP ksp1; 00267 // PCKSPGetKSP(mPCContext.PC_amg_A11,&ksp1); 00268 // KSPSetType(ksp1, KSPCG); 00269 // KSPSetTolerances(ksp1, 0.1, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT); 00270 // 00271 // PC prec1; 00272 // KSPGetPC(ksp1, &prec1); 00273 // PCSetType(prec1, PCBJACOBI); 00274 // PCSetFromOptions(prec1); 00275 // PCSetOperators(prec1, mPCContext.A11_matrix_subblock, mPCContext.A11_matrix_subblock, DIFFERENT_NONZERO_PATTERN);// SAME_PRECONDITIONER); 00276 // PCSetUp(prec1); 00277 // 00278 // KSPSetFromOptions(ksp1); 00279 // KSPSetUp(ksp1); 00281 00282 PCSetFromOptions(mPCContext.PC_amg_A11); 00283 PCSetUp(mPCContext.PC_amg_A11); 00284 00285 /* 00286 * Set up amg preconditioner for block A22 00287 */ 00288 PCCreate(PETSC_COMM_WORLD, &(mPCContext.PC_amg_A22)); 00289 PCSetOperators(mPCContext.PC_amg_A22, mPCContext.A22_matrix_subblock, mPCContext.A22_matrix_subblock, SAME_PRECONDITIONER); 00290 00291 // Choose between the two following blocks in order to approximate inv(A11) with one AMG cycle 00292 // or with an CG solve with high tolerance 00294 PCSetType(mPCContext.PC_amg_A22, PCHYPRE); 00295 PetscOptionsSetValue("-pc_hypre_type", "boomeramg"); 00296 PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter", "1"); 00297 PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0"); 00298 PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "HMIS"); 00299 // PetscOptionsSetValue("-pc_hypre_boomeramg_interp_type","ext+i"); 00300 00302 // PCSetType(mPCContext.PC_amg_A22, PCKSP); 00303 // KSP ksp2; 00304 // PCKSPGetKSP(mPCContext.PC_amg_A22,&ksp2); 00305 // KSPSetType(ksp2, KSPCG); 00306 // KSPSetTolerances(ksp2, 0.1, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT); 00307 // 00308 // PC prec2; 00309 // KSPGetPC(ksp2, &prec2); 00310 // PCSetType(prec2, PCBJACOBI); 00311 // PCSetFromOptions(prec2); 00312 // PCSetOperators(prec2, mPCContext.A22_matrix_subblock, mPCContext.A22_matrix_subblock, DIFFERENT_NONZERO_PATTERN);// SAME_PRECONDITIONER); 00313 // PCSetUp(prec2); 00314 // 00315 // KSPSetFromOptions(ksp2); 00316 // KSPSetUp(ksp2); 00318 00319 PCSetFromOptions(mPCContext.PC_amg_A22); 00320 PCSetUp(mPCContext.PC_amg_A22); 00321 } 00322 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later 00323 PetscErrorCode PCLDUFactorisationApply(PC pc_object, Vec x, Vec y) 00324 { 00325 void* pc_context; 00326 00327 PCShellGetContext(pc_object, &pc_context); 00328 #else 00329 PetscErrorCode PCLDUFactorisationApply(void* pc_context, Vec x, Vec y) 00330 { 00331 #endif 00332 00333 00334 // Cast the pointer to a PC context to our defined type 00335 PCLDUFactorisation::PCLDUFactorisationContext* block_diag_context = (PCLDUFactorisation::PCLDUFactorisationContext*) pc_context; 00336 assert(block_diag_context!=NULL); 00337 00338 /* 00339 * Split vector x into two. x = [x1 x2]' 00340 */ 00341 #ifdef TRACE_KSP 00342 double init_time = MPI_Wtime(); 00343 #endif 00344 00345 PetscVecTools::DoInterleavedVecScatter(x, block_diag_context->A11_scatter_ctx, block_diag_context->x1_subvector, block_diag_context->A22_scatter_ctx, block_diag_context->x2_subvector); 00346 00347 #ifdef TRACE_KSP 00348 block_diag_context->mScatterTime += MPI_Wtime() - init_time; 00349 #endif 00350 00351 /* 00352 * Apply preconditioner: [y1 y2]' = inv(P)[x1 x2]' 00353 * 00354 * z = inv(A11)*x1 00355 * y2 = inv(A22)*(x2 - B*z) 00356 * y1 = z - inv(A11)(B*y2) 00357 */ 00358 #ifdef TRACE_KSP 00359 init_time = MPI_Wtime(); 00360 #endif 00361 //z = inv(A11)*x1 00362 PCApply(block_diag_context->PC_amg_A11, block_diag_context->x1_subvector, block_diag_context->z); 00363 #ifdef TRACE_KSP 00364 block_diag_context->mA1PreconditionerTime += MPI_Wtime() - init_time; 00365 #endif 00366 00367 //y2 = inv(A22)*(x2 - B*z) 00368 #ifdef TRACE_KSP 00369 init_time = MPI_Wtime(); 00370 #endif 00371 MatMult(block_diag_context->B_matrix_subblock,block_diag_context->z,block_diag_context->temp); //temp = B*z 00372 double minus_one = -1.0; 00373 #if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2 00374 VecAYPX(&minus_one, block_diag_context->x2_subvector, block_diag_context->temp); // temp <-- x2 - temp 00375 #else 00376 VecAYPX(block_diag_context->temp, minus_one, block_diag_context->x2_subvector); // temp <-- x2 - temp 00377 #endif 00378 #ifdef TRACE_KSP 00379 block_diag_context->mExtraLAOperations += MPI_Wtime() - init_time; 00380 #endif 00381 00382 00383 #ifdef TRACE_KSP 00384 init_time = MPI_Wtime(); 00385 #endif 00386 PCApply(block_diag_context->PC_amg_A22, block_diag_context->temp, block_diag_context->y2_subvector); // y2 = inv(A22)*temp 00387 #ifdef TRACE_KSP 00388 block_diag_context->mA2PreconditionerTime += MPI_Wtime() - init_time; 00389 #endif 00390 00391 // y1 = z - inv(A11)(B*y2) 00392 #ifdef TRACE_KSP 00393 init_time = MPI_Wtime(); 00394 #endif 00395 MatMult(block_diag_context->B_matrix_subblock,block_diag_context->y2_subvector,block_diag_context->temp); //temp = B*y2 00396 #ifdef TRACE_KSP 00397 block_diag_context->mExtraLAOperations += MPI_Wtime() - init_time; 00398 #endif 00399 #ifdef TRACE_KSP 00400 init_time = MPI_Wtime(); 00401 #endif 00402 PCApply(block_diag_context->PC_amg_A11, block_diag_context->temp, block_diag_context->y1_subvector); // y1 = inv(A11)*temp 00403 #ifdef TRACE_KSP 00404 block_diag_context->mA1PreconditionerTime += MPI_Wtime() - init_time; 00405 #endif 00406 00407 #ifdef TRACE_KSP 00408 init_time = MPI_Wtime(); 00409 #endif 00410 #if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2 00411 VecAYPX(&minus_one, block_diag_context->z, block_diag_context->y1_subvector); // y1 <-- z - y1 00412 #else 00413 VecAYPX(block_diag_context->y1_subvector, minus_one, block_diag_context->z); // y1 <-- z - y1 00414 #endif 00415 #ifdef TRACE_KSP 00416 block_diag_context->mExtraLAOperations += MPI_Wtime() - init_time; 00417 #endif 00418 00419 /* 00420 * Gather vectors y1 and y2. y = [y1 y2]' 00421 */ 00422 #ifdef TRACE_KSP 00423 init_time = MPI_Wtime(); 00424 #endif 00425 00426 PetscVecTools::DoInterleavedVecGather(y, block_diag_context->A11_scatter_ctx, block_diag_context->y1_subvector, block_diag_context->A22_scatter_ctx, block_diag_context->y2_subvector); 00427 00428 #ifdef TRACE_KSP 00429 block_diag_context->mGatherTime += MPI_Wtime() - init_time; 00430 #endif 00431 00432 return 0; 00433 }