관리 메뉴

HAMA 블로그

volumeRender.cpp 본문

CUDA

volumeRender.cpp

[하마] 이승현 (wowlsh93@gmail.com) 2014. 8. 27. 22:43

/*

 * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.

 *

 * Please refer to the NVIDIA end user license agreement (EULA) associated

 * with this source code for terms and conditions that govern your use of

 * this software. Any use, reproduction, disclosure, or distribution of

 * this software and related documentation outside the terms of the EULA

 * is strictly prohibited.

 *

 */


/*

    Volume rendering sample


    This sample loads a 3D volume from disk and displays it using

    ray marching and 3D textures.


    Note - this is intended to be an example of using 3D textures

    in CUDA, not an optimized volume renderer.


    Changes

    sgg 22/3/2010

    - updated to use texture for display instead of glDrawPixels.

    - changed to render from front-to-back rather than back-to-front.

*/


/*

// OpenGL Graphics includes

#include <GL/glew.h>

#if defined (__APPLE__) || defined(MACOSX)

#include <GLUT/glut.h>

#else

#include <GL/freeglut.h>

#endif


// CUDA Runtime, Interop, and includes

#include <cuda_runtime.h>

#include <cuda_gl_interop.h>

#include <vector_types.h>

#include <vector_functions.h>

#include <driver_functions.h>


// CUDA utilities

#include <helper_cuda.h>

#include <helper_cuda_gl.h>


// Helper functions

#include <helper_cuda.h>

#include <helper_functions.h>

#include <helper_timer.h>


typedef unsigned int uint;

typedef unsigned char uchar;


#define MAX_EPSILON_ERROR 5.00f

#define THRESHOLD         0.30f


// Define the files that are to be save and the reference images for validation

const char *sOriginal[] =

{

    "volume.ppm",

    NULL

};


const char *sReference[] =

{

    "ref_volume.ppm",

    NULL

};


const char *sSDKsample = "CUDA 3D Volume Render";


const char *volumeFilename = "Bucky.raw";

cudaExtent volumeSize = make_cudaExtent(32, 32, 32);

typedef unsigned char VolumeType;


//char *volumeFilename = "mrt16_angio.raw";

//cudaExtent volumeSize = make_cudaExtent(416, 512, 112);

//typedef unsigned short VolumeType;


uint width = 512, height = 512;

dim3 blockSize(16, 16);

dim3 gridSize;


float3 viewRotation;

float3 viewTranslation = make_float3(0.0, 0.0, -4.0f);

float invViewMatrix[12];


float density = 0.05f;

float brightness = 1.0f;

float transferOffset = 0.0f;

float transferScale = 1.0f;

bool linearFiltering = true;


GLuint pbo = 0;     // OpenGL pixel buffer object

GLuint tex = 0;     // OpenGL texture object

struct cudaGraphicsResource *cuda_pbo_resource; // CUDA Graphics Resource (to transfer PBO)


StopWatchInterface *timer = 0;


// Auto-Verification Code

const int frameCheckNumber = 2;

int fpsCount = 0;        // FPS count for averaging

int fpsLimit = 1;        // FPS limit for sampling

int g_Index = 0;

unsigned int frameCount = 0;


int *pArgc;

char **pArgv;


#ifndef MAX

#define MAX(a,b) ((a > b) ? a : b)

#endif


extern "C" void setTextureFilterMode(bool bLinearFilter);

extern "C" void initCuda(void *h_volume, cudaExtent volumeSize);

extern "C" void freeCudaBuffers();

extern "C" void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH,

                              float density, float brightness, float transferOffset, float transferScale);

extern "C" void copyInvViewMatrix(float *invViewMatrix, size_t sizeofMatrix);


void initPixelBuffer();


void computeFPS()

{

    frameCount++;

    fpsCount++;


    if (fpsCount == fpsLimit)

    {

        char fps[256];

        float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);

        sprintf(fps, "Volume Render: %3.1f fps", ifps);


        glutSetWindowTitle(fps);

        fpsCount = 0;


        fpsLimit = (int)MAX(1.f, ifps);

        sdkResetTimer(&timer);

    }

}


// render image using CUDA

void render()

{

    copyInvViewMatrix(invViewMatrix, sizeof(float4)*3);


    // map PBO to get CUDA device pointer

    uint *d_output;

    // map PBO to get CUDA device pointer

    checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));

    size_t num_bytes;

    checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&d_output, &num_bytes,

                                                         cuda_pbo_resource));

    //printf("CUDA mapped PBO: May access %ld bytes\n", num_bytes);


    // clear image

    checkCudaErrors(cudaMemset(d_output, 0, width*height*4));


    // call CUDA kernel, writing results to PBO

    render_kernel(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale);


    getLastCudaError("kernel failed");


    checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

}


// display results using OpenGL (called by GLUT)

void display()

{

    sdkStartTimer(&timer);


    // use OpenGL to build view matrix

    GLfloat modelView[16];

    glMatrixMode(GL_MODELVIEW);

    glPushMatrix();

    glLoadIdentity();

    glRotatef(-viewRotation.x, 1.0, 0.0, 0.0);

    glRotatef(-viewRotation.y, 0.0, 1.0, 0.0);

    glTranslatef(-viewTranslation.x, -viewTranslation.y, -viewTranslation.z);

    glGetFloatv(GL_MODELVIEW_MATRIX, modelView);

    glPopMatrix();


    invViewMatrix[0] = modelView[0];

    invViewMatrix[1] = modelView[4];

    invViewMatrix[2] = modelView[8];

    invViewMatrix[3] = modelView[12];

    invViewMatrix[4] = modelView[1];

    invViewMatrix[5] = modelView[5];

    invViewMatrix[6] = modelView[9];

    invViewMatrix[7] = modelView[13];

    invViewMatrix[8] = modelView[2];

    invViewMatrix[9] = modelView[6];

    invViewMatrix[10] = modelView[10];

    invViewMatrix[11] = modelView[14];


    render();


    // display results

    glClear(GL_COLOR_BUFFER_BIT);


    // draw image from PBO

    glDisable(GL_DEPTH_TEST);


    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

#if 0

    // draw using glDrawPixels (slower)

    glRasterPos2i(0, 0);

    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);

    glDrawPixels(width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0);

    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);

#else

    // draw using texture


    // copy from pbo to texture

    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0);

    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);


    // draw textured quad

    glEnable(GL_TEXTURE_2D);

    glBegin(GL_QUADS);

glTexCoord2f(0, 0);

glVertex2f(0, 0);

glTexCoord2f(1, 0);

glVertex2f(1, 0);

glTexCoord2f(1, 1);

glVertex2f(1, 1);

glTexCoord2f(0, 1);

glVertex2f(0, 1);

    glEnd();


    glDisable(GL_TEXTURE_2D);

    glBindTexture(GL_TEXTURE_2D, 0);

#endif


    glutSwapBuffers();

    glutReportErrors();


    sdkStopTimer(&timer);


    computeFPS();

}


void idle()

{

    glutPostRedisplay();

}


void keyboard(unsigned char key, int x, int y)

{

    switch (key)

    {

        case 27:

            exit(EXIT_SUCCESS);

            break;


        case 'f':

            linearFiltering = !linearFiltering;

            setTextureFilterMode(linearFiltering);

            break;


        case '+':

            density += 0.01f;

            break;


        case '-':

            density -= 0.01f;

            break;


        case ']':

            brightness += 0.1f;

            break;


        case '[':

            brightness -= 0.1f;

            break;


        case ';':

            transferOffset += 0.01f;

            break;


        case '\'':

            transferOffset -= 0.01f;

            break;


        case '.':

            transferScale += 0.01f;

            break;


        case ',':

            transferScale -= 0.01f;

            break;


        default:

            break;

    }


    printf("density = %.2f, brightness = %.2f, transferOffset = %.2f, transferScale = %.2f\n", density, brightness, transferOffset, transferScale);

    glutPostRedisplay();

}


int ox, oy;

int buttonState = 0;


void mouse(int button, int state, int x, int y)

{

    if (state == GLUT_DOWN)

    {

        buttonState  |= 1<<button;

    }

    else if (state == GLUT_UP)

    {

        buttonState = 0;

    }


    ox = x;

    oy = y;

    glutPostRedisplay();

}


void motion(int x, int y)

{

    float dx, dy;

    dx = (float)(x - ox);

    dy = (float)(y - oy);


    if (buttonState == 4)

    {

        // right = zoom

        viewTranslation.z += dy / 100.0f;

    }

    else if (buttonState == 2)

    {

        // middle = translate

        viewTranslation.x += dx / 100.0f;

        viewTranslation.y -= dy / 100.0f;

    }

    else if (buttonState == 1)

    {

        // left = rotate

        viewRotation.x += dy / 5.0f;

        viewRotation.y += dx / 5.0f;

    }


    ox = x;

    oy = y;

    glutPostRedisplay();

}


int iDivUp(int a, int b)

{

    return (a % b != 0) ? (a / b + 1) : (a / b);

}


void reshape(int w, int h)

{

    width = w;

    height = h;

    initPixelBuffer();


    // calculate new grid size

    gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y));


    glViewport(0, 0, w, h);


    glMatrixMode(GL_MODELVIEW);

    glLoadIdentity();


    glMatrixMode(GL_PROJECTION);

    glLoadIdentity();

    glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);

}


void cleanup()

{

    sdkDeleteTimer(&timer);


    freeCudaBuffers();


    if (pbo)

    {

        cudaGraphicsUnregisterResource(cuda_pbo_resource);

        glDeleteBuffersARB(1, &pbo);

        glDeleteTextures(1, &tex);

    }

}


void initGL(int *argc, char **argv)

{

    // initialize GLUT callback functions

    glutInit(argc, argv);

    glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);

    glutInitWindowSize(width, height);

    glutCreateWindow("CUDA volume rendering");


    glewInit();


    if (!glewIsSupported("GL_VERSION_2_0 GL_ARB_pixel_buffer_object"))

    {

        printf("Required OpenGL extensions missing.");

        exit(EXIT_SUCCESS);

    }

}


void initPixelBuffer()

{

    if (pbo)

    {

        // unregister this buffer object from CUDA C

        checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_resource));


        // delete old buffer

        glDeleteBuffersARB(1, &pbo);

        glDeleteTextures(1, &tex);

    }


    // create pixel buffer object for display

    glGenBuffersARB(1, &pbo);

    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);

    glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width*height*sizeof(GLubyte)*4, 0, GL_STREAM_DRAW_ARB);

    glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);


    // register this buffer object with CUDA

    checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard));


    // create texture for display

    glGenTextures(1, &tex);

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

    glBindTexture(GL_TEXTURE_2D, 0);

}


// Load raw data from disk

void *loadRawFile(char *filename, size_t size)

{

    FILE *fp = fopen(filename, "rb");


    if (!fp)

    {

        fprintf(stderr, "Error opening file '%s'\n", filename);

        return 0;

    }


    void *data = malloc(size);

    size_t read = fread(data, 1, size, fp);

    fclose(fp);


    printf("Read '%s', %d bytes\n", filename, read);


    return data;

}


// General initialization call for CUDA Device

int chooseCudaDevice(int argc, const char **argv, bool bUseOpenGL)

{

    int result = 0;


    if (bUseOpenGL)

    {

        result = findCudaGLDevice(argc, argv);

    }

    else

    {

        result = findCudaDevice(argc, argv);

    }


    return result;

}


void runSingleTest(const char *ref_file, const char *exec_path)

{

    bool bTestResult = true;


    uint *d_output;

    checkCudaErrors(cudaMalloc((void **)&d_output, width*height*sizeof(uint)));

    checkCudaErrors(cudaMemset(d_output, 0, width*height*sizeof(uint)));


    float modelView[16] =

    {

        1.0f, 0.0f, 0.0f, 0.0f,

        0.0f, 1.0f, 0.0f, 0.0f,

        0.0f, 0.0f, 1.0f, 0.0f,

        0.0f, 0.0f, 4.0f, 1.0f

    };


    invViewMatrix[0] = modelView[0];

    invViewMatrix[1] = modelView[4];

    invViewMatrix[2] = modelView[8];

    invViewMatrix[3] = modelView[12];

    invViewMatrix[4] = modelView[1];

    invViewMatrix[5] = modelView[5];

    invViewMatrix[6] = modelView[9];

    invViewMatrix[7] = modelView[13];

    invViewMatrix[8] = modelView[2];

    invViewMatrix[9] = modelView[6];

    invViewMatrix[10] = modelView[10];

    invViewMatrix[11] = modelView[14];


    // call CUDA kernel, writing results to PBO

    copyInvViewMatrix(invViewMatrix, sizeof(float4)*3);


    // Start timer 0 and process n loops on the GPU

    int nIter = 10;


    for (int i = -1; i < nIter; i++)

    {

        if (i == 0)

        {

            cudaDeviceSynchronize();

            sdkStartTimer(&timer);

        }


        render_kernel(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale);

    }


    cudaDeviceSynchronize();

    sdkStopTimer(&timer);

    // Get elapsed time and throughput, then log to sample and master logs

    double dAvgTime = sdkGetTimerValue(&timer)/(nIter * 1000.0);

    printf("volumeRender, Throughput = %.4f MTexels/s, Time = %.5f s, Size = %u Texels, NumDevsUsed = %u, Workgroup = %u\n",

           (1.0e-6 * width * height)/dAvgTime, dAvgTime, (width * height), 1, blockSize.x * blockSize.y);



    getLastCudaError("Error: render_kernel() execution FAILED");

    checkCudaErrors(cudaDeviceSynchronize());


    unsigned char *h_output = (unsigned char *)malloc(width*height*4);

    checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*4, cudaMemcpyDeviceToHost));


    sdkSavePPM4ub("volume.ppm", h_output, width, height);

    bTestResult = sdkComparePPM("volume.ppm", sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, THRESHOLD, true);


    cudaFree(d_output);

    free(h_output);

    cleanup();


    exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);

}


////////////////////////////////////////////////////////////////////////////////

// Program main

////////////////////////////////////////////////////////////////////////////////

int

main(int argc, char **argv)

{

    pArgc = &argc;

    pArgv = argv;


    char *ref_file = NULL;


    //start logs

    printf("%s Starting...\n\n", sSDKsample);


    if (checkCmdLineFlag(argc, (const char **)argv, "file"))

    {

        getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);

        fpsLimit = frameCheckNumber;

    }


    if (ref_file)

    {

        // use command-line specified CUDA device, otherwise use device with highest Gflops/s

        chooseCudaDevice(argc, (const char **)argv, false);

    }

    else

    {

        // First initialize OpenGL context, so we can properly set the GL for CUDA.

        // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.

        initGL(&argc, argv);


        // use command-line specified CUDA device, otherwise use device with highest Gflops/s

        chooseCudaDevice(argc, (const char **)argv, true);

    }


    // parse arguments

    char *filename;


    if (getCmdLineArgumentString(argc, (const char **) argv, "volume", &filename))

    {

        volumeFilename = filename;

    }


    int n;


    if (checkCmdLineFlag(argc, (const char **) argv, "size"))

    {

        n = getCmdLineArgumentInt(argc, (const char **) argv, "size");

        volumeSize.width = volumeSize.height = volumeSize.depth = n;

    }


    if (checkCmdLineFlag(argc, (const char **) argv, "xsize"))

    {

        n = getCmdLineArgumentInt(argc, (const char **) argv, "xsize");

        volumeSize.width = n;

    }


    if (checkCmdLineFlag(argc, (const char **) argv, "ysize"))

    {

        n = getCmdLineArgumentInt(argc, (const char **) argv, "ysize");

        volumeSize.height = n;

    }


    if (checkCmdLineFlag(argc, (const char **) argv, "zsize"))

    {

        n= getCmdLineArgumentInt(argc, (const char **) argv, "zsize");

        volumeSize.depth = n;

    }


    // load volume data

    char *path = sdkFindFilePath(volumeFilename, argv[0]);


    if (path == 0)

    {

        printf("Error finding file '%s'\n", volumeFilename);

        exit(EXIT_FAILURE);

    }


    size_t size = volumeSize.width*volumeSize.height*volumeSize.depth*sizeof(VolumeType);

    void *h_volume = loadRawFile(path, size);


    initCuda(h_volume, volumeSize);

    free(h_volume);


    sdkCreateTimer(&timer);


    printf("Press '+' and '-' to change density (0.01 increments)\n"

           "      ']' and '[' to change brightness\n"

           "      ';' and ''' to modify transfer function offset\n"

           "      '.' and ',' to modify transfer function scale\n\n");


    // calculate new grid size

    gridSize = dim3(iDivUp(width, blockSize.x), iDivUp(height, blockSize.y));


    if (ref_file)

    {

        runSingleTest(ref_file, argv[0]);

    }

    else

    {

        // This is the normal rendering path for VolumeRender

        glutDisplayFunc(display);

        glutKeyboardFunc(keyboard);

        glutMouseFunc(mouse);

        glutMotionFunc(motion);

        glutReshapeFunc(reshape);

        glutIdleFunc(idle);


        initPixelBuffer();


        atexit(cleanup);


        glutMainLoop();

    }


    cudaDeviceReset();

    exit(EXIT_SUCCESS);

}


*/

Comments