#include "FragmentProgramARB10.h"

#include <stdio.h>
#include <string.h>

//using namespace std;

#if defined(__APPLE__)
  // OS X
  #define GL_EXT_vertex_shader 1
  #define GL_GLEXT_FUNCTION_POINTERS 1
  #include <OpenGL/gl.h>
  #include <OpenGL/glu.h>
  #include <OpenGL/glext.h>
  #include "extensions.h"
#elif defined(sgi)
  // IRIX
  #include <GL/gl.h>
  #include <GL/glu.h>
  #include "extensions.h"
  //#include <GL/glext.h>
#elif defined(__linux__)
  // LINUX
  #include <GL/gl.h>
  #include <GL/glu.h>
  #include <GL/glext.h>
  #include "extensions.h"
#else
  // WIN32
  #define DECLARE_EXTENSION_SUBSTANCE
  #include "extensions.h"
  #include "glext.h"
#endif

#if __APPLE__
extern glGetProgramivARBProcPtr			pfglGetProgramivARB;
extern glGenProgramsARBProcPtr			pfglGenProgramsARB;
extern glBindProgramARBProcPtr			pfglBindProgramARB;
extern glGetProgramivARBProcPtr			pfglProgramStringARB;
#endif

unsigned char fragProgram0STD_[] = {
	// Standard fragment program header
	"!!ARBfp1.0\n"
//  "PARAM C0 = { 1.0, 2.0, 1.0, 1.0 };"
//  "PARAM C1 = { 1.0, 1.0, 1.0, 1.0 };"
  "OUTPUT	outcolor = result.color;"
  "TEMP	R0, R1, R2;"
  "MOV R0, fragment.texcoord[0];"  // R0 = { cx, ?, ?, cy }
//  "MUL R2, R0, C1;"                // R2 = { cx, ?, ?, 0.5*cy }
  "MOV R2, R0;"                // R2 = { cx, ?, ?, cy }
};

unsigned char fragProgram1STD_[] = {
  "MOV R0.z, -R0.w;"                   // R0 = { zx, ?, -zy, zy }
  "MAD R1, R0.xyzw, R0.xxxx, R2;"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R0.xyw, R0.wwwz, -R0.wwwx, R1.xyzw;"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
};

unsigned char fragProgram2STD_[] = {
  //"MOV outcolor.xyz, R0;"
  "MAD outcolor.xyz, R0, R1, R2;"
	"END"
};

unsigned char* fragProgram0_ = fragProgram0STD_;
unsigned char* fragProgram1_ = fragProgram1STD_;
unsigned char* fragProgram2_ = fragProgram2STD_;

// Test texture (not yet used)
const unsigned char textureImage_[4*4*3] = {
  0,0,0, 255,255,255, 0,0,0, 255,255,255,
  255,0,255, 0,0,0, 255,255,255, 0,0,0,
  0,0,0, 255,255,255, 0,0,0, 255,255,255,
  255,255,255, 0,0,0, 255,255,255, 0,0,0,
};


// Benjamin Lipchak swizzle optimizations.

// GPU Fragment Program - D.Paccaloni & B.Lipchak -------------------
// The Radeon FP compiler seems to have some problems with complex swizzles, eating up too many ALU slots.
// That's why we have to use 3 instructions per iteration :(
// Note that for Fragment Programs to work, we don't have to enable GL_TEXTURE_2D !

unsigned char fragProgram0R300_[] = {
	// Standard fragment program header
  "!!ARBfp1.0\n"
  "OUTPUT	outcolor = result.color;\n"
  "TEMP	R0, R1, R2, R3;\n"
  "MOV R2, fragment.texcoord[0];\n"  // R0 = { cx, ?, ?, cy }
  "\n"
  "MAD R1, R2.xyzw, R2.xxxx, R2;\n"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R3.xy, R2.wwww, -R2.wwww, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "MAD R3.w, -R2.wwww, -R2.xxxx, R1.wwww;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "\n"
  "MAD R1, R3.xyzw, R3.xxxx, R2;\n"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R0.xy, R3.wwww, -R3.wwww, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "MAD R0.w, -R3.wwww, -R3.xxxx, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "\n"
};

unsigned char fragProgram1R300_[] = {
  "MAD R1, R0.xyzw, R0.xxxx, R2;\n"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R3.xy, R0.wwww, -R0.wwww, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "MAD R3.w, -R0.wwww, -R0.xxxx, R1.wwww;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "\n"
  "MAD R1, R3.xyzw, R3.xxxx, R2;\n"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R0.xy, R3.wwww, -R3.wwww, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "MAD R0.w, -R3.wwww, -R3.xxxx, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "\n"
};

unsigned char fragProgram2R300_[] = {
  "MAD R1, R0.xyzw, R0.xxxx, R2;\n"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R3.xy, R0.wwww, -R0.wwww, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "MAD R3.w, -R0.wwww, -R0.xxxx, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "\n"
  "MOV R3.z, -R3.w;\n"                   // R0 = { zx, ?, -zy, zy }
  "MOV R0.z, -R3.w;\n"                   // R0 = { zx, ?, -zy, zy }
  "MAD R1, R3.xyzw, R3.xxxx, R2;\n"     // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy }
  "MAD R0.xy, R3.wwww, -R3.wwww, R1.xyzw;\n"      // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy  }
  "\n"
  "MAD outcolor.xyz, R0, R1, R2;\n"
	"END\n"
};


unsigned char fragProgram_[640*1024]; // 640 KB will be enough for everyone ... :)

FragmentProgramARB10::FragmentProgramARB10(int iters, int w, int h, double ax, double ay, double ex, double ey) : FragmentProgram(iters, w, h, ax, ay, ex, ey) {
  usingR3xx_ = false;
  
  // This code will work on PowerPC and Intel Macs
  //	NOTE: The graphics are incorrectly colored on PowerPC Macs -- is this an endian issue?
//#ifdef __APPLE__
//  // TODO: Why the hell NSGLGetProcAddress() this return a function pointer of functions which crashes ?
//  // FragmentProgramARB10 is currently disabled on the Mac.
//  printf("If you can tell me why OpenGL extension functions are crashing on the Mac, you'll get your FragmentProgram.\n");
//  isValid_ = false;
//  return;
//#endif

  isValid_ = initialize(iters, w, h, ax, ay, ex, ey);
}

FragmentProgramARB10::~FragmentProgramARB10(void) {
#if !defined(sgi)
  if (isValid_) {
    glDisable(GL_FRAGMENT_PROGRAM_ARB);
    glDisable(GL_TEXTURE_2D);
    glDeleteProgramsARB(1, &fpid_);
  }
#endif
}

bool FragmentProgramARB10::initialize(int iters, int w, int h, double ax, double ay, double ex, double ey) {
  return initializeGPU_FP(iters, w, h, ax, ay, ex, ey);
}

bool FragmentProgramARB10::initializeGPU_FP(int iters, int w, int h, double ax, double ay, double ex, double ey) {
#if !defined(sgi)
  // Initialize Fragment Program
  iters_ = iters;
  InitExtensionsARB();
  if (!checkRequiredExtensions()) {
    printf("ARB Fragment Program: Required extensions are not supported.\n");
    return false;
  }
  // Check GPU limits
  printf("  Maximum number of FP ALU instructions: ");
  glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_ALU_INSTRUCTIONS_ARB, &maxInstr_);
  if (glGetError() == GL_NO_ERROR) printf("%d\n", maxInstr_);
  else printf("UNKNOWN\n");
  GLint maxLocalConsts;
  printf("  Maximum number of FP native params: ");
  glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_PARAMETERS_ARB, &maxLocalConsts);
  if (glGetError() == GL_NO_ERROR) printf("%d\n", maxLocalConsts);
  else printf("UNKNOWN\n");
  // Check if GL impl is ATI's
  if (strstr((char*)glGetString(GL_VENDOR), "ATI")) {
    // Running on an ATI R3xx or better.
    // Enable R3xx optimizations.
    printf("  Running on an ATI R3xx or better.\n");
    printf("  Enabling optimized scheduling (iters count must be even).\n");
    usingR3xx_ = true;
    fragProgram0_ = fragProgram0R300_;
    fragProgram1_ = fragProgram1R300_;
    fragProgram2_ = fragProgram2R300_;
    if (iters < 4) {
      iters = 4;
      printf("  WARNING: R3xx code requires min 4 iters.\n");
    }
  }
  //GLint maxLocals;
  //glGetIntegerv(GL_MAX_OPTIMIZED_VERTEX_SHADER_LOCALS_EXT , &maxLocals);
  //cout << "  Maximum number of FP locals: " << maxLocals << endl;
  // Generate Vertex Program for required number of iters
  char* vpPrt = (char*) fragProgram_;
  int len = sprintf(vpPrt, "%s", fragProgram0_);
  vpPrt += len;
  //iters = 22;
  int repliCount = iters;
  if (usingR3xx_) repliCount = (iters-4)/2;
  for (unsigned int i=0; i<repliCount; i++) {
    len = sprintf(vpPrt, "%s", fragProgram1_);
    vpPrt += len;
  }
  len = sprintf(vpPrt, "%s", fragProgram2_);
  vpPrt += len;
  // Generate a new fragment program
  glGenProgramsARB(1, &fpid_);
  if (glGetError() != GL_NO_ERROR) {
    printf("ERROR: glGenProgramsARB FAILED!\n");
    return false;
  }
  //Bind our program
  //glBindProgramNV(GL_VERTEX_PROGRAM_NV, vpid_);
  glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, fpid_);
  if (glGetError() != GL_NO_ERROR) {
    printf("ERROR: glBindProgramsARB FAILED!\n");
    return false;
  }
  //Load our program(enum, vertex program id, length of program, program text)
  //glLoadProgramNV(GL_VERTEX_PROGRAM_NV, vpid_, strlen((char*)vertexProgram_), (unsigned char*)vertexProgram_);
  GLsizei mystrlen = (GLsizei)(strlen((char*)fragProgram_));
  
  glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, mystrlen, fragProgram_);
  if (glGetError() != GL_NO_ERROR) {
    printf("ERROR: glProgramStringARB FAILED!:\n");
    printf("%s\n", (char const*)glGetString(GL_PROGRAM_ERROR_STRING_ARB));
    return false;
  }

  // Check that FP is native
  GLint errorPos;
  GLint isNative;
  glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errorPos);
  glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &isNative);
  if ((errorPos == -1) && (isNative == 1)) {
    printf("  FP is hardware native (");
    // Get number of ALU instructions used
    GLint usedALUs;
    glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, &usedALUs);
    if (glGetError() == GL_NO_ERROR) printf("%d", usedALUs);
    else printf("???");
    printf(" ALU instructions).\n");
  }
  else {
    printf("WARNING: FP is NOT hardware native ! (HW limit exceeded)\n");
  }

  // Place constants into GPU Constant Memory
  //glProgramParameter4fNV(GL_FRAGMENT_PROGRAM_ARB, 0, 1.0f, -1.0f, 0.0f, 0.5f);
  //glProgramParameter4fNV(GL_FRAGMENT_PROGRAM_ARB, 1, 0.2f, 1.0f, 0.4f, 0.0f);
  //glProgramParameter4fNV(GL_FRAGMENT_PROGRAM_ARB, 2, 10.0f, 10.0f, 10.0f, 10.0f);
  this->prepareWorldSpace(w, h, ax, ay, ex, ey);
  
  // Create texture
  //createTexture();
  // Enable Vertex Programs
  glEnable(GL_FRAGMENT_PROGRAM_ARB);
  return true;
#else
  return false;
#endif
}

void FragmentProgramARB10::prepareWorldSpace(int w, int h, double ax, double ay, double ex, double ey) {
#if !defined(sgi)
  // Recalc real plane parameters
  double sx = (ex - ax) / ((double) w);
  //sy_ = (ey_ - ay_) / ((double) h_);
  double sy = sx;

  //glMatrixMode(GL_MODELVIEW);
  //glOrtho(0, 1, 1, 0, -1, 1);

  glMatrixMode(GL_PROJECTION);
  glLoadIdentity();

  //glEnable(GL_TEXTURE_2D);
  glEnable(GL_FRAGMENT_PROGRAM_ARB);
  this->setOrtho2D(ax, ex, ay+sy*(double)h, ay);
#endif
}

bool FragmentProgramARB10::setOrtho2D(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top) {
  gluOrtho2D(left, right, bottom, top);
  //glTrackMatrixNV(GL_VERTEX_PROGRAM_NV, 4, GL_PROJECTION, GL_IDENTITY_NV);
  return true;
};

bool FragmentProgramARB10::checkRequiredExtensions(void) {
  if (!CheckExtension("GL_ARB_fragment_program")) return false;
  return true;
}

bool FragmentProgramARB10::createTexture(void) {
  glGenTextures(1, &texid_);
  glBindTexture(GL_TEXTURE_2D, texid_);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  glTexImage2D(GL_TEXTURE_2D, 0, 3, /*w*/4, /*h*/4, 0, GL_RGB, GL_UNSIGNED_BYTE, textureImage_);
  return true;
}

