lumosquad/CG_SOLVER.cpp
2025-07-26 22:14:37 +02:00

309 lines
10 KiB
C++

///////////////////////////////////////////////////////////////////////////////////
// File : CG_SOLVER.cpp
///////////////////////////////////////////////////////////////////////////////////
//
// LumosQuad - A Lightning Generator
// Copyright 2007
// The University of North Carolina at Chapel Hill
//
///////////////////////////////////////////////////////////////////////////////////
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// The University of North Carolina at Chapel Hill makes no representations
// about the suitability of this software for any purpose. It is provided
// "as is" without express or implied warranty.
//
// Permission to use, copy, modify and distribute this software and its
// documentation for educational, research and non-profit purposes, without
// fee, and without a written agreement is hereby granted, provided that the
// above copyright notice and the following three paragraphs appear in all
// copies.
//
// THE UNIVERSITY OF NORTH CAROLINA SPECIFICALLY DISCLAIM ANY WARRANTIES,
// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
// FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN
// "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA HAS NO OBLIGATION TO
// PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
//
// Please send questions and comments about LumosQuad to kim@cs.unc.edu.
//
///////////////////////////////////////////////////////////////////////////////////
//
// This program uses OpenEXR, which has the following restrictions:
//
// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
// Digital Ltd. LLC
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Industrial Light & Magic nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
#include "CG_SOLVER.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CG_SOLVER::CG_SOLVER(int maxDepth, int iterations, int digits) :
_iterations(iterations),
_arraySize(0), _listSize(0), _digits(digits),
_direction(NULL), _residual(NULL), _q(NULL), _potential(NULL)
{
// compute the physical size of various grid cells
_dx = new float[maxDepth + 1];
_dx[0] = 1.0f;
for (int x = 1; x <= maxDepth; x++)
_dx[x] = _dx[x - 1] * 0.5f;
}
CG_SOLVER::~CG_SOLVER()
{
if (_direction) delete[] _direction;
if (_residual) delete[] _residual;
if (_potential) delete[] _potential;
if (_q) delete[] _q;
}
//////////////////////////////////////////////////////////////////////
// reallocate scratch arrays if necessary
//////////////////////////////////////////////////////////////////////
void CG_SOLVER::reallocate()
{
// if we have enough size already, return
if (_arraySize >= _listSize) return;
// made sure it SSE aligns okay
_arraySize = _listSize * 2;
if (_arraySize % 4)
_arraySize += 4 - _arraySize % 4;
// delete the old ones
if (_direction) delete[] _direction;
if (_residual) delete[] _residual;
if (_q) delete[] _q;
// allocate the new ones
_direction = new float[_arraySize];
_residual = new float[_arraySize];
_q = new float[_arraySize];
// wipe the new ones
for (int x = 0; x < _arraySize; x++)
_direction[x] = _residual[x] = _q[x] = 0.0f;
}
//////////////////////////////////////////////////////////////////////
// conjugate gradient solver
//////////////////////////////////////////////////////////////////////
int CG_SOLVER::solve(list<CELL*> cells)
{
// counters
int x, y, index;
list<CELL*>::iterator cellIterator;
// i = 0
int i = 0;
// precalculate stencils
calcStencils(cells);
// reallocate scratch arrays if necessary
_listSize = cells.size();
reallocate();
// compute a new lexicographical order
cellIterator = cells.begin();
for (x = 0; x < _listSize; x++, cellIterator++)
(*cellIterator)->index = x;
// r = b - Ax
calcResidual(cells);
// copy residual into easy array
// d = r
cellIterator = cells.begin();
float deltaNew = 0.0f;
for (x = 0; x < _listSize; x++, cellIterator++)
{
_direction[x] = _residual[x];
deltaNew += _residual[x] * _residual[x];
}
// delta0 = deltaNew
float delta0 = deltaNew;
// While deltaNew > (eps^2) * delta0
float eps = pow(10.0f, (float)-_digits);
float maxR = 2.0f * eps;
while ((i < _iterations) && (maxR > eps))
{
// q = Ad
cellIterator = cells.begin();
for (y = 0; y < _listSize; y++, cellIterator++)
{
CELL* currentCell = *cellIterator;
CELL** neighbors = currentCell->neighbors;
float neighborSum = 0.0f;
for (int x = 0; x < 4; x++)
{
int j = x * 2;
neighborSum += _direction[neighbors[j]->index] * currentCell->stencil[j];
if (neighbors[j+1])
neighborSum += _direction[neighbors[j+1]->index] * currentCell->stencil[j+1];
}
_q[y] = -neighborSum + _direction[y] * currentCell->stencil[8];
}
// alpha = deltaNew / (transpose(d) * q)
float alpha = 0.0f;
for (x = 0; x < _listSize; x++)
alpha += _direction[x] * _q[x];
if (fabs(alpha) > 0.0f)
alpha = deltaNew / alpha;
// x = x + alpha * d
cellIterator = cells.begin();
for (x = 0; x < _listSize; x++, cellIterator++)
(*cellIterator)->potential += alpha * _direction[x];
// r = r - alpha * q
maxR = 0.0f;
for (x = 0; x < _listSize; x++)
{
_residual[x] -= _q[x] * alpha;
maxR = (_residual[x] > maxR) ? _residual[x] : maxR;
}
// deltaOld = deltaNew
float deltaOld = deltaNew;
// deltaNew = transpose(r) * r
deltaNew = 0.0f;
for (x = 0; x < _listSize; x++)
deltaNew += _residual[x] * _residual[x];
// beta = deltaNew / deltaOld
float beta = deltaNew / deltaOld;
// d = r + beta * d
for (x = 0; x < _listSize; x++)
_direction[x] = _residual[x] + beta * _direction[x];
// i = i + 1
i++;
}
return i;
}
//////////////////////////////////////////////////////////////////////
// calculate the residuals
//////////////////////////////////////////////////////////////////////
float CG_SOLVER::calcResidual(list<CELL*> cells)
{
float maxResidual = 0.0f;
list<CELL*>::iterator cellIterator = cells.begin();
for (int i = 0; i < _listSize; i++, cellIterator++)
{
CELL* currentCell = *cellIterator;
float dx = _dx[currentCell->depth];
float neighborSum = 0.0f;
for (int x = 0; x < 4; x++)
{
int i = x * 2;
neighborSum += currentCell->neighbors[i]->potential * currentCell->stencil[i];
if (currentCell->neighbors[i+1])
neighborSum += currentCell->neighbors[i+1]->potential * currentCell->stencil[i+1];
}
_residual[i] = currentCell->b - (-neighborSum + currentCell->potential * currentCell->stencil[8]);
if (fabs(_residual[i]) > maxResidual)
maxResidual = fabs(_residual[i]);
}
return maxResidual;
}
//////////////////////////////////////////////////////////////////////
// compute stencils once and store
//////////////////////////////////////////////////////////////////////
void CG_SOLVER::calcStencils(list<CELL*> cells)
{
list<CELL*>::iterator cellIterator = cells.begin();
for (cellIterator = cells.begin(); cellIterator != cells.end(); cellIterator++)
{
CELL* currentCell = *cellIterator;
float invDx = 1.0f / _dx[currentCell->depth];
// sum over faces
float deltaSum = 0.0f;
float bSum = 0.0f;
for (int x = 0; x < 4; x++)
{
int i = x * 2;
currentCell->stencil[i] = 0.0f;
currentCell->stencil[i+1] = 0.0f;
if (currentCell->neighbors[i + 1] == NULL) {
// if it is the same refinement level (case 1)
if (currentCell->depth == currentCell->neighbors[i]->depth) {
deltaSum += invDx;
if (!currentCell->neighbors[i]->boundary)
currentCell->stencil[i] = invDx;
else
bSum += (currentCell->neighbors[i]->potential) * invDx;
}
// else it is less refined (case 3)
else {
deltaSum += 0.5f * invDx;
if (!currentCell->neighbors[i]->boundary)
currentCell->stencil[i] = 0.5f * invDx;
else
bSum += currentCell->neighbors[i]->potential * 0.5f * invDx;
}
}
// if the neighbor is at a lower level (case 2)
else {
deltaSum += 2.0f * invDx;
if (!currentCell->neighbors[i]->boundary)
currentCell->stencil[i] = invDx;
else
bSum += currentCell->neighbors[i]->potential * invDx;
if (!currentCell->neighbors[i+1]->boundary)
currentCell->stencil[i+1] = invDx;
else
bSum += currentCell->neighbors[i+1]->potential * invDx;
}
}
currentCell->stencil[8] = deltaSum;
currentCell->b = bSum;
}
}