/////////////////////////////////////////////////////////////////////////////////// // File : CG_SOLVER.cpp /////////////////////////////////////////////////////////////////////////////////// // // LumosQuad - A Lightning Generator // Copyright 2007 // The University of North Carolina at Chapel Hill // /////////////////////////////////////////////////////////////////////////////////// // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // The University of North Carolina at Chapel Hill makes no representations // about the suitability of this software for any purpose. It is provided // "as is" without express or implied warranty. // // Permission to use, copy, modify and distribute this software and its // documentation for educational, research and non-profit purposes, without // fee, and without a written agreement is hereby granted, provided that the // above copyright notice and the following three paragraphs appear in all // copies. // // THE UNIVERSITY OF NORTH CAROLINA SPECIFICALLY DISCLAIM ANY WARRANTIES, // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND // FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN // "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA HAS NO OBLIGATION TO // PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. // // Please send questions and comments about LumosQuad to kim@cs.unc.edu. // /////////////////////////////////////////////////////////////////////////////////// // // This program uses OpenEXR, which has the following restrictions: // // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas // Digital Ltd. LLC // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Industrial Light & Magic nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // #include "CG_SOLVER.h" ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// CG_SOLVER::CG_SOLVER(int maxDepth, int iterations, int digits) : _iterations(iterations), _arraySize(0), _listSize(0), _digits(digits), _direction(NULL), _residual(NULL), _q(NULL), _potential(NULL) { // compute the physical size of various grid cells _dx = new float[maxDepth + 1]; _dx[0] = 1.0f; for (int x = 1; x <= maxDepth; x++) _dx[x] = _dx[x - 1] * 0.5f; } CG_SOLVER::~CG_SOLVER() { if (_direction) delete[] _direction; if (_residual) delete[] _residual; if (_potential) delete[] _potential; if (_q) delete[] _q; } ////////////////////////////////////////////////////////////////////// // reallocate scratch arrays if necessary ////////////////////////////////////////////////////////////////////// void CG_SOLVER::reallocate() { // if we have enough size already, return if (_arraySize >= _listSize) return; // made sure it SSE aligns okay _arraySize = _listSize * 2; if (_arraySize % 4) _arraySize += 4 - _arraySize % 4; // delete the old ones if (_direction) delete[] _direction; if (_residual) delete[] _residual; if (_q) delete[] _q; // allocate the new ones _direction = new float[_arraySize]; _residual = new float[_arraySize]; _q = new float[_arraySize]; // wipe the new ones for (int x = 0; x < _arraySize; x++) _direction[x] = _residual[x] = _q[x] = 0.0f; } ////////////////////////////////////////////////////////////////////// // conjugate gradient solver ////////////////////////////////////////////////////////////////////// int CG_SOLVER::solve(list cells) { // counters int x, y, index; list::iterator cellIterator; // i = 0 int i = 0; // precalculate stencils calcStencils(cells); // reallocate scratch arrays if necessary _listSize = cells.size(); reallocate(); // compute a new lexicographical order cellIterator = cells.begin(); for (x = 0; x < _listSize; x++, cellIterator++) (*cellIterator)->index = x; // r = b - Ax calcResidual(cells); // copy residual into easy array // d = r cellIterator = cells.begin(); float deltaNew = 0.0f; for (x = 0; x < _listSize; x++, cellIterator++) { _direction[x] = _residual[x]; deltaNew += _residual[x] * _residual[x]; } // delta0 = deltaNew float delta0 = deltaNew; // While deltaNew > (eps^2) * delta0 float eps = pow(10.0f, (float)-_digits); float maxR = 2.0f * eps; while ((i < _iterations) && (maxR > eps)) { // q = Ad cellIterator = cells.begin(); for (y = 0; y < _listSize; y++, cellIterator++) { CELL* currentCell = *cellIterator; CELL** neighbors = currentCell->neighbors; float neighborSum = 0.0f; for (int x = 0; x < 4; x++) { int j = x * 2; neighborSum += _direction[neighbors[j]->index] * currentCell->stencil[j]; if (neighbors[j+1]) neighborSum += _direction[neighbors[j+1]->index] * currentCell->stencil[j+1]; } _q[y] = -neighborSum + _direction[y] * currentCell->stencil[8]; } // alpha = deltaNew / (transpose(d) * q) float alpha = 0.0f; for (x = 0; x < _listSize; x++) alpha += _direction[x] * _q[x]; if (fabs(alpha) > 0.0f) alpha = deltaNew / alpha; // x = x + alpha * d cellIterator = cells.begin(); for (x = 0; x < _listSize; x++, cellIterator++) (*cellIterator)->potential += alpha * _direction[x]; // r = r - alpha * q maxR = 0.0f; for (x = 0; x < _listSize; x++) { _residual[x] -= _q[x] * alpha; maxR = (_residual[x] > maxR) ? _residual[x] : maxR; } // deltaOld = deltaNew float deltaOld = deltaNew; // deltaNew = transpose(r) * r deltaNew = 0.0f; for (x = 0; x < _listSize; x++) deltaNew += _residual[x] * _residual[x]; // beta = deltaNew / deltaOld float beta = deltaNew / deltaOld; // d = r + beta * d for (x = 0; x < _listSize; x++) _direction[x] = _residual[x] + beta * _direction[x]; // i = i + 1 i++; } return i; } ////////////////////////////////////////////////////////////////////// // calculate the residuals ////////////////////////////////////////////////////////////////////// float CG_SOLVER::calcResidual(list cells) { float maxResidual = 0.0f; list::iterator cellIterator = cells.begin(); for (int i = 0; i < _listSize; i++, cellIterator++) { CELL* currentCell = *cellIterator; float dx = _dx[currentCell->depth]; float neighborSum = 0.0f; for (int x = 0; x < 4; x++) { int i = x * 2; neighborSum += currentCell->neighbors[i]->potential * currentCell->stencil[i]; if (currentCell->neighbors[i+1]) neighborSum += currentCell->neighbors[i+1]->potential * currentCell->stencil[i+1]; } _residual[i] = currentCell->b - (-neighborSum + currentCell->potential * currentCell->stencil[8]); if (fabs(_residual[i]) > maxResidual) maxResidual = fabs(_residual[i]); } return maxResidual; } ////////////////////////////////////////////////////////////////////// // compute stencils once and store ////////////////////////////////////////////////////////////////////// void CG_SOLVER::calcStencils(list cells) { list::iterator cellIterator = cells.begin(); for (cellIterator = cells.begin(); cellIterator != cells.end(); cellIterator++) { CELL* currentCell = *cellIterator; float invDx = 1.0f / _dx[currentCell->depth]; // sum over faces float deltaSum = 0.0f; float bSum = 0.0f; for (int x = 0; x < 4; x++) { int i = x * 2; currentCell->stencil[i] = 0.0f; currentCell->stencil[i+1] = 0.0f; if (currentCell->neighbors[i + 1] == NULL) { // if it is the same refinement level (case 1) if (currentCell->depth == currentCell->neighbors[i]->depth) { deltaSum += invDx; if (!currentCell->neighbors[i]->boundary) currentCell->stencil[i] = invDx; else bSum += (currentCell->neighbors[i]->potential) * invDx; } // else it is less refined (case 3) else { deltaSum += 0.5f * invDx; if (!currentCell->neighbors[i]->boundary) currentCell->stencil[i] = 0.5f * invDx; else bSum += currentCell->neighbors[i]->potential * 0.5f * invDx; } } // if the neighbor is at a lower level (case 2) else { deltaSum += 2.0f * invDx; if (!currentCell->neighbors[i]->boundary) currentCell->stencil[i] = invDx; else bSum += currentCell->neighbors[i]->potential * invDx; if (!currentCell->neighbors[i+1]->boundary) currentCell->stencil[i+1] = invDx; else bSum += currentCell->neighbors[i+1]->potential * invDx; } } currentCell->stencil[8] = deltaSum; currentCell->b = bSum; } }