309 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			309 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| ///////////////////////////////////////////////////////////////////////////////////
 | |
| // File : CG_SOLVER.cpp
 | |
| ///////////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| // LumosQuad - A Lightning Generator
 | |
| // Copyright 2007
 | |
| // The University of North Carolina at Chapel Hill
 | |
| // 
 | |
| ///////////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| //  This program is free software; you can redistribute it and/or modify
 | |
| //  it under the terms of the GNU General Public License as published by
 | |
| //  the Free Software Foundation; either version 2 of the License, or
 | |
| //  (at your option) any later version.
 | |
| //
 | |
| //  This program is distributed in the hope that it will be useful,
 | |
| //  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| //  GNU General Public License for more details.
 | |
| // 
 | |
| //  You should have received a copy of the GNU General Public License
 | |
| //  along with this program; if not, write to the Free Software
 | |
| //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| //
 | |
| //  The University of North Carolina at Chapel Hill makes no representations 
 | |
| //  about the suitability of this software for any purpose. It is provided 
 | |
| //  "as is" without express or implied warranty.
 | |
| //
 | |
| //  Permission to use, copy, modify and distribute this software and its
 | |
| //  documentation for educational, research and non-profit purposes, without
 | |
| //  fee, and without a written agreement is hereby granted, provided that the
 | |
| //  above copyright notice and the following three paragraphs appear in all
 | |
| //  copies.
 | |
| //
 | |
| //  THE UNIVERSITY OF NORTH CAROLINA SPECIFICALLY DISCLAIM ANY WARRANTIES,
 | |
| //  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 | |
| //  FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN
 | |
| //  "AS IS" BASIS, AND THE UNIVERSITY OF NORTH CAROLINA HAS NO OBLIGATION TO
 | |
| //  PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 | |
| //
 | |
| //  Please send questions and comments about LumosQuad to kim@cs.unc.edu.
 | |
| //
 | |
| ///////////////////////////////////////////////////////////////////////////////////
 | |
| //
 | |
| //  This program uses OpenEXR, which has the following restrictions:
 | |
| // 
 | |
| //  Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
 | |
| //  Digital Ltd. LLC
 | |
| // 
 | |
| //  All rights reserved.
 | |
| // 
 | |
| //  Redistribution and use in source and binary forms, with or without
 | |
| //  modification, are permitted provided that the following conditions are
 | |
| //  met:
 | |
| //  *       Redistributions of source code must retain the above copyright
 | |
| //  notice, this list of conditions and the following disclaimer.
 | |
| //  *       Redistributions in binary form must reproduce the above
 | |
| //  copyright notice, this list of conditions and the following disclaimer
 | |
| //  in the documentation and/or other materials provided with the
 | |
| //  distribution.
 | |
| //  *       Neither the name of Industrial Light & Magic nor the names of
 | |
| //  its contributors may be used to endorse or promote products derived
 | |
| //  from this software without specific prior written permission. 
 | |
| // 
 | |
| 
 | |
| #include "CG_SOLVER.h"
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| // Construction/Destruction
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| 
 | |
| CG_SOLVER::CG_SOLVER(int maxDepth, int iterations, int digits) :
 | |
|   _iterations(iterations), 
 | |
|   _arraySize(0), _listSize(0), _digits(digits),
 | |
|   _direction(NULL), _residual(NULL), _q(NULL), _potential(NULL)
 | |
| {
 | |
|   // compute the physical size of various grid cells
 | |
|   _dx = new float[maxDepth + 1];
 | |
|   _dx[0] = 1.0f;
 | |
|   for (int x = 1; x <= maxDepth; x++)
 | |
|     _dx[x] = _dx[x - 1] * 0.5f;
 | |
| }
 | |
| 
 | |
| CG_SOLVER::~CG_SOLVER()
 | |
| {
 | |
|   if (_direction) delete[] _direction;
 | |
|   if (_residual) delete[] _residual;
 | |
|   if (_potential) delete[] _potential;
 | |
|   if (_q) delete[] _q;
 | |
| }
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| // reallocate scratch arrays if necessary
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| void CG_SOLVER::reallocate()
 | |
| {
 | |
|   // if we have enough size already, return
 | |
|   if (_arraySize >= _listSize) return;
 | |
| 
 | |
|   // made sure it SSE aligns okay
 | |
|   _arraySize = _listSize * 2;
 | |
|   if (_arraySize % 4)
 | |
|     _arraySize += 4 - _arraySize % 4;
 | |
|  
 | |
|   // delete the old ones
 | |
|   if (_direction) delete[] _direction;
 | |
|   if (_residual) delete[] _residual;
 | |
|   if (_q) delete[] _q;
 | |
| 
 | |
|   // allocate the new ones
 | |
|   _direction = new float[_arraySize];
 | |
|   _residual = new float[_arraySize];
 | |
|   _q = new float[_arraySize];
 | |
| 
 | |
|   // wipe the new ones
 | |
|   for (int x = 0; x < _arraySize; x++)
 | |
|     _direction[x] = _residual[x] = _q[x] = 0.0f;
 | |
| }
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| // conjugate gradient solver
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| int CG_SOLVER::solve(list<CELL*> cells)
 | |
| {
 | |
|   // counters
 | |
|   int x, y, index;
 | |
|   list<CELL*>::iterator cellIterator;
 | |
| 
 | |
|   // i = 0
 | |
|   int i = 0;
 | |
| 
 | |
|   // precalculate stencils
 | |
|   calcStencils(cells);
 | |
|  
 | |
|   // reallocate scratch arrays if necessary
 | |
|   _listSize = cells.size();
 | |
|   reallocate();
 | |
|  
 | |
|   // compute a new lexicographical order
 | |
|   cellIterator = cells.begin();
 | |
|   for (x = 0; x < _listSize; x++, cellIterator++)
 | |
|     (*cellIterator)->index = x;
 | |
|   
 | |
|   // r = b - Ax
 | |
|   calcResidual(cells);
 | |
| 
 | |
|   // copy residual into easy array
 | |
|   // d = r
 | |
|   cellIterator = cells.begin();
 | |
|   float deltaNew = 0.0f;
 | |
|   for (x = 0; x < _listSize; x++, cellIterator++)
 | |
|   {
 | |
|     _direction[x] = _residual[x];
 | |
|     deltaNew += _residual[x] * _residual[x];
 | |
|   }
 | |
| 
 | |
|   // delta0 = deltaNew
 | |
|   float delta0 = deltaNew;
 | |
|  
 | |
|   // While deltaNew > (eps^2) * delta0
 | |
|   float eps  = pow(10.0f, (float)-_digits);
 | |
|   float maxR = 2.0f * eps;
 | |
|   while ((i < _iterations) && (maxR > eps))
 | |
|   {
 | |
|     // q = Ad
 | |
|     cellIterator = cells.begin();
 | |
|     for (y = 0; y < _listSize; y++, cellIterator++)
 | |
|     {
 | |
|       CELL* currentCell = *cellIterator;
 | |
|       CELL** neighbors = currentCell->neighbors;
 | |
| 
 | |
|       float neighborSum = 0.0f;
 | |
|       for (int x = 0; x < 4; x++)
 | |
|       {
 | |
|         int j = x * 2;
 | |
|         neighborSum += _direction[neighbors[j]->index] * currentCell->stencil[j];
 | |
|         if (neighbors[j+1])
 | |
|           neighborSum += _direction[neighbors[j+1]->index] * currentCell->stencil[j+1];
 | |
|       }
 | |
|       _q[y] = -neighborSum + _direction[y] * currentCell->stencil[8];
 | |
|     }
 | |
|     // alpha = deltaNew / (transpose(d) * q)
 | |
|     float alpha = 0.0f;
 | |
|     for (x = 0; x < _listSize; x++)
 | |
|       alpha += _direction[x] * _q[x];
 | |
|     if (fabs(alpha) > 0.0f)
 | |
|       alpha = deltaNew / alpha;
 | |
| 
 | |
|     // x = x + alpha * d
 | |
|     cellIterator = cells.begin();
 | |
|     for (x = 0; x < _listSize; x++, cellIterator++)
 | |
|       (*cellIterator)->potential += alpha * _direction[x];
 | |
| 
 | |
|     // r = r - alpha * q
 | |
|     maxR = 0.0f;
 | |
|     for (x = 0; x < _listSize; x++)
 | |
|     {
 | |
|       _residual[x] -= _q[x] * alpha;
 | |
|       maxR = (_residual[x] > maxR) ? _residual[x] : maxR;
 | |
|     }
 | |
|     
 | |
|     // deltaOld = deltaNew
 | |
|     float deltaOld = deltaNew;
 | |
| 
 | |
|     // deltaNew = transpose(r) * r
 | |
|     deltaNew = 0.0f;
 | |
|     for (x = 0; x < _listSize; x++)
 | |
|       deltaNew += _residual[x] * _residual[x];
 | |
| 
 | |
|     // beta = deltaNew / deltaOld
 | |
|     float beta = deltaNew / deltaOld;
 | |
| 
 | |
|     // d = r + beta * d
 | |
|     for (x = 0; x < _listSize; x++)
 | |
|       _direction[x] = _residual[x] + beta * _direction[x];
 | |
| 
 | |
|     // i = i + 1
 | |
|     i++;
 | |
|   }
 | |
| 
 | |
|   return i;
 | |
| }
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| // calculate the residuals
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| float CG_SOLVER::calcResidual(list<CELL*> cells)
 | |
| {
 | |
|   float maxResidual = 0.0f;
 | |
|   
 | |
|   list<CELL*>::iterator cellIterator = cells.begin();
 | |
|   for (int i = 0; i < _listSize; i++, cellIterator++)
 | |
|   {
 | |
|     CELL* currentCell = *cellIterator;
 | |
|     float dx = _dx[currentCell->depth];
 | |
|     float neighborSum = 0.0f;
 | |
|     
 | |
|     for (int x = 0; x < 4; x++)
 | |
|     {
 | |
|       int i = x * 2;
 | |
|       neighborSum += currentCell->neighbors[i]->potential * currentCell->stencil[i];
 | |
|       if (currentCell->neighbors[i+1])
 | |
|         neighborSum += currentCell->neighbors[i+1]->potential * currentCell->stencil[i+1];
 | |
|     }
 | |
|     _residual[i] = currentCell->b - (-neighborSum + currentCell->potential * currentCell->stencil[8]);
 | |
|     
 | |
|     if (fabs(_residual[i]) > maxResidual)
 | |
|       maxResidual = fabs(_residual[i]);
 | |
|   }
 | |
|   return maxResidual;
 | |
| }
 | |
| 
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| // compute stencils once and store
 | |
| //////////////////////////////////////////////////////////////////////
 | |
| void CG_SOLVER::calcStencils(list<CELL*> cells)
 | |
| {
 | |
|   list<CELL*>::iterator cellIterator = cells.begin();
 | |
|   for (cellIterator = cells.begin(); cellIterator != cells.end(); cellIterator++)
 | |
|   {
 | |
|     CELL* currentCell = *cellIterator;
 | |
|     float invDx = 1.0f / _dx[currentCell->depth];
 | |
| 
 | |
|     // sum over faces
 | |
|     float deltaSum = 0.0f;
 | |
|     float bSum = 0.0f;
 | |
| 
 | |
|     for (int x = 0; x < 4; x++)
 | |
|     {
 | |
|       int i = x * 2;
 | |
|       currentCell->stencil[i] = 0.0f;
 | |
|       currentCell->stencil[i+1] = 0.0f;
 | |
|       
 | |
|       if (currentCell->neighbors[i + 1] == NULL) {
 | |
|         // if it is the same refinement level (case 1)
 | |
|         if (currentCell->depth == currentCell->neighbors[i]->depth) {
 | |
|           deltaSum += invDx;
 | |
|           if (!currentCell->neighbors[i]->boundary)
 | |
|             currentCell->stencil[i] = invDx;
 | |
|           else
 | |
|             bSum += (currentCell->neighbors[i]->potential) * invDx;
 | |
|         }
 | |
|         // else it is less refined (case 3)
 | |
|         else {
 | |
|           deltaSum += 0.5f * invDx;
 | |
|           if (!currentCell->neighbors[i]->boundary)
 | |
|             currentCell->stencil[i] = 0.5f * invDx;
 | |
|           else
 | |
|             bSum += currentCell->neighbors[i]->potential * 0.5f * invDx;
 | |
|         }
 | |
|       }
 | |
|       // if the neighbor is at a lower level (case 2)
 | |
|       else {
 | |
|         deltaSum += 2.0f * invDx;
 | |
|         if (!currentCell->neighbors[i]->boundary)
 | |
|           currentCell->stencil[i] = invDx;
 | |
|         else
 | |
|           bSum += currentCell->neighbors[i]->potential * invDx;
 | |
|         if (!currentCell->neighbors[i+1]->boundary)
 | |
|           currentCell->stencil[i+1] = invDx;
 | |
|         else
 | |
|           bSum += currentCell->neighbors[i+1]->potential * invDx;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     currentCell->stencil[8] = deltaSum;
 | |
|     currentCell->b = bSum;
 | |
|   }
 | |
| }
 |