/*
A screen layer contains 768 tiles (32*24), requiring 24KB. Each tile is 32 bytes,
holding 2 pixels per byte. The upper four bits of a tile byte represent the
colour of the right pixel, and the lower four bits the colour of the left pixel.
The colour is a 4-bit reference into a 16-colour RGB15 palette.

Screen memory contains four layers: bg, fg, then the same but for double buffers.
Ignore double buffering for now. We keep these as u32* because a single 32-bit
integer represents a single 8-pixel row.
TODO: This needs to be updated. We have double buffering, and we use u16*.

The tile map is shared by the foreground and background layers, and is stored
just past the first layer for both screens (at the 24KB offset). Each entry is
a 16-bit tile index (low 10 bits are tile index, then h-flip, then v-flip, then
4-bit palette identifier).
*/

#include <nds.h>
#include "screen.h"
#include "../bang.h"

Screen scr_main = {
    .bgv  = BG_TILE_RAM(BG_SLOT_VIS),
    .fgv  = BG_TILE_RAM(FG_SLOT_VIS),
    .bg   = BG_TILE_RAM(BG_SLOT),
    .fg   = BG_TILE_RAM(FG_SLOT),
    .map  = BG_MAP_RAM(MAP_SLOT),
    .palv = BG_PALETTE,
};
Screen scr_sub = {
    .bgv  = BG_TILE_RAM_SUB(BG_SLOT_VIS),
    .fgv  = BG_TILE_RAM_SUB(FG_SLOT_VIS),
    .bg   = BG_TILE_RAM_SUB(BG_SLOT),
    .fg   = BG_TILE_RAM_SUB(FG_SLOT),
    .map  = BG_MAP_RAM_SUB(MAP_SLOT),
    .palv = BG_PALETTE_SUB,
};

// TODO: Make an enum thing for main/sub, combine these functions
void scr_make_main(ScreenDevice *scr) {
    scr->nds = &scr_main;
    for (int i=0; i<16; i++) {
        scr->nds->pal[i] = scr->palette[i];
    }
    scr->wake = true;
}

void scr_make_sub(ScreenDevice *scr) {
    scr->nds = &scr_sub;
    for (int i=0; i<16; i++) {
        scr->nds->pal[i] = scr->palette[i];
    }
    scr->wake = true;
}

void scr_unmake(ScreenDevice *scr) {
    if (scr->nds) {
        black_screen(scr->nds);
        scr->nds = NULL;
    }
}

void init_screens(void) {
    // Allocate VRAM for screens
    videoSetMode(DISPLAY_BG0_ACTIVE | DISPLAY_BG1_ACTIVE | MODE_0_2D);
    vramSetBankA(VRAM_A_MAIN_BG);
    videoSetModeSub(DISPLAY_BG0_ACTIVE | DISPLAY_BG1_ACTIVE | MODE_0_2D);
    vramSetBankC(VRAM_C_SUB_BG);

    /* Configure screen layers to use tile graphics. */
    REG_BG0CNT     = BG_32x32 | BG_COLOR_16 | BG_PRIORITY_3 | BG_TILE_BASE(BG_SLOT_VIS) | BG_MAP_BASE(MAP_SLOT);
    REG_BG1CNT     = BG_32x32 | BG_COLOR_16 | BG_PRIORITY_2 | BG_TILE_BASE(FG_SLOT_VIS) | BG_MAP_BASE(MAP_SLOT);
    REG_BG0CNT_SUB = BG_32x32 | BG_COLOR_16 | BG_PRIORITY_3 | BG_TILE_BASE(BG_SLOT_VIS) | BG_MAP_BASE(MAP_SLOT);
    REG_BG1CNT_SUB = BG_32x32 | BG_COLOR_16 | BG_PRIORITY_2 | BG_TILE_BASE(FG_SLOT_VIS) | BG_MAP_BASE(MAP_SLOT);

    /* Populate tile maps with tile indices. */
    int i;
    u16 *main_map = BG_MAP_RAM(12);
    u16 *sub_map  = BG_MAP_RAM_SUB(12);
    for (i = 0; i < TILES_SIZE; i++) {
        *(main_map++) = i;
        *(sub_map++)  = i;
    }
}


void set_palette_high(ScreenDevice *scr, u8 high) {
    SET_HIGH(scr->palette_write, high);
}
void set_palette_low(ScreenDevice *scr, u8 low) {
    SET_LOW(scr->palette_write, low);
    u8 i = scr->palette_write >> 12 & 0x0f;
    u8 r = scr->palette_write >>  7 & 0x1e;
    u8 g = scr->palette_write >>  3 & 0x1e;
    u8 b = scr->palette_write <<  1 & 0x1e;
    scr->palette[i] = RGB15(r,g,b);
    if (scr->nds) {
        scr->nds->pal[i] = RGB15(r,g,b);
    }
}

void push_sprite(SpriteBuffer *b, u8 row) {
    b->mem[b->p] = row;
    b->p = (b->p + 1) % 16;
    b->cached = FALSE;
}

void prepare_1bit_sprite(SpriteBuffer *b, u8 draw) {
    u8 l,p,x,y;
    if (b->cached && draw == b->draw) return;

    switch (draw & 0x07) {
    case 0x0: p=b->p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; for (x=0;x<8;x++) { b->sprite[y][x] = l>>(7-x) & 1; } }; break;
    case 0x1: p=b->p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; for (x=0;x<8;x++) { b->sprite[y][x] = l>>(  x) & 1; } }; break;
    case 0x2: p=b->p;   for (y=0;y<8;y++) { l=b->mem[--p % 16]; for (x=0;x<8;x++) { b->sprite[y][x] = l>>(7-x) & 1; } }; break;
    case 0x3: p=b->p;   for (y=0;y<8;y++) { l=b->mem[--p % 16]; for (x=0;x<8;x++) { b->sprite[y][x] = l>>(  x) & 1; } }; break;
    case 0x4: p=b->p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; for (x=0;x<8;x++) { b->sprite[x][y] = l>>(7-x) & 1; } }; break;
    case 0x5: p=b->p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; for (x=0;x<8;x++) { b->sprite[x][y] = l>>(  x) & 1; } }; break;
    case 0x6: p=b->p;   for (y=0;y<8;y++) { l=b->mem[--p % 16]; for (x=0;x<8;x++) { b->sprite[x][y] = l>>(7-x) & 1; } }; break;
    case 0x7: p=b->p;   for (y=0;y<8;y++) { l=b->mem[--p % 16]; for (x=0;x<8;x++) { b->sprite[x][y] = l>>(  x) & 1; } }; break;
    }

    b->cached = TRUE;
    b->draw = draw;
}

void prepare_2bit_sprite(SpriteBuffer *b, u8 draw) {
    u8 l,h,i,p,s,x,y;
    if (b->cached && draw == b->draw) return;

    switch (draw & 0x07) {
    case 0x0: p=b->p+8; s=p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; h=b->mem[s++ % 16]; for (x=0;x<8;x++) { i=(7-x); b->sprite[y][x] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x1: p=b->p+8; s=p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; h=b->mem[s++ % 16]; for (x=0;x<8;x++) { i=(  x); b->sprite[y][x] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x2: p=b->p;   s=p+8; for (y=0;y<8;y++) { l=b->mem[--p % 16]; h=b->mem[--s % 16]; for (x=0;x<8;x++) { i=(7-x); b->sprite[y][x] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x3: p=b->p;   s=p+8; for (y=0;y<8;y++) { l=b->mem[--p % 16]; h=b->mem[--s % 16]; for (x=0;x<8;x++) { i=(  x); b->sprite[y][x] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x4: p=b->p+8; s=p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; h=b->mem[s++ % 16]; for (x=0;x<8;x++) { i=(7-x); b->sprite[x][y] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x5: p=b->p+8; s=p+8; for (y=0;y<8;y++) { l=b->mem[p++ % 16]; h=b->mem[s++ % 16]; for (x=0;x<8;x++) { i=(  x); b->sprite[x][y] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x6: p=b->p;   s=p+8; for (y=0;y<8;y++) { l=b->mem[--p % 16]; h=b->mem[--s % 16]; for (x=0;x<8;x++) { i=(7-x); b->sprite[x][y] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    case 0x7: p=b->p;   s=p+8; for (y=0;y<8;y++) { l=b->mem[--p % 16]; h=b->mem[--s % 16]; for (x=0;x<8;x++) { i=(  x); b->sprite[x][y] = (l>>i & 1) | (h>>i & 1) << 1; } }; break;
    }

    b->cached = TRUE;
    b->draw = draw;
}


// ---------------------------------------------------------------------------

void draw_pixel(u16 *layer, u16 x, u16 y, u8 colour) {
    if (x < PIXELS_WIDTH && y < PIXELS_HEIGHT) {
        u32 addr = \
            (x >> 2 & 0x0001) + (x << 1 & 0xfff0) + \
            (y << 1 & 0x000f) + (y << 6 & 0xfe00);
        u16 shift = (x & 0x3) << 2;
        layer[addr] = (layer[addr] & ~(0xf << shift)) | (colour << shift);
    }
}

void fill_layer(u16 *layer, u8 colour) {
    u8 byte = colour << 4 | colour;
    u32 word = byte << 24 | byte << 16 | byte << 8 | byte;
    dmaFillWords(word, layer, TILES_MEM);
}

void erase_screen(Screen *nds) {
    if (nds) {
        dmaFillWords(0, nds->bg, TILES_MEM);
        dmaFillWords(0, nds->fg, TILES_MEM);
    }
};

void flip_buffer(Screen *nds) {
    if (nds) {
        dmaCopyWords(0, nds->bg, nds->bgv, TILES_MEM);
        dmaCopyWords(0, nds->fg, nds->fgv, TILES_MEM);
        for (int i=0; i<16; i++) {
            nds->palv[i] = nds->pal[i];
        }
    }
}

void black_screen(Screen *nds) {
    if (nds) {
        for (int i=0; i<16; i++) {
            nds->palv[i] = RGB15(0,0,0);
        }
        dmaFillWords(0, nds->bgv, TILES_MEM);
        dmaFillWords(0, nds->fgv, TILES_MEM);
    }
}
// ---------------------------------------------------------------------------

void draw_dispatch(ScreenDevice *scr, u8 draw) {
    if (scr->nds) {
        switch (draw >> 4) {
        case 0x0:  scr_draw_pixel(scr, scr->nds->bg, draw); break;
        case 0x1: scr_draw_sprite(scr, scr->nds->bg, draw); break;
        case 0x2:  scr_fill_layer(scr, scr->nds->bg, draw); break;
        case 0x3: scr_draw_sprite(scr, scr->nds->bg, draw); break;
        case 0x4:   scr_draw_line(scr, scr->nds->bg, draw); break;
        case 0x5:   scr_draw_line(scr, scr->nds->bg, draw); break;
        case 0x6:   scr_draw_rect(scr, scr->nds->bg, draw); break;
        case 0x7:   scr_draw_rect(scr, scr->nds->bg, draw); break;
        case 0x8:  scr_draw_pixel(scr, scr->nds->fg, draw); break;
        case 0x9: scr_draw_sprite(scr, scr->nds->fg, draw); break;
        case 0xA:  scr_fill_layer(scr, scr->nds->fg, draw); break;
        case 0xB: scr_draw_sprite(scr, scr->nds->fg, draw); break;
        case 0xC:   scr_draw_line(scr, scr->nds->fg, draw); break;
        case 0xD:   scr_draw_line(scr, scr->nds->fg, draw); break;
        case 0xE:   scr_draw_rect(scr, scr->nds->fg, draw); break;
        case 0xF:   scr_draw_rect(scr, scr->nds->fg, draw); break;
        }
        scr->dirty = true;
    }
    scr->px = scr->x;
    scr->py = scr->y;
}

void scr_draw_pixel(ScreenDevice *scr, u16 *layer, u8 draw) {
    draw_pixel(layer, scr->x, scr->y, draw&0xf);
}

void scr_fill_layer(ScreenDevice *scr, u16 *layer, u8 draw) {
    fill_layer(layer, draw&0xf);
}

void scr_draw_sprite(ScreenDevice *scr, u16 *layer, u8 draw) {
    if (draw & 0x20) { prepare_2bit_sprite(&scr->sprite, draw); }
    else             { prepare_1bit_sprite(&scr->sprite, draw); }

    u8 colours[4] = {
        scr->colours >> 12 & 0x000f,
        scr->colours >>  8 & 0x000f,
        scr->colours >>  4 & 0x000f,
        scr->colours       & 0x000f,
    };

    if (draw & 0x08) {
    // Draw sprite with transparent background
        for (u8 y=0;y<8;y++) {
            for (u8 x=0;x<8;x++) {
                u8 i = scr->sprite.sprite[y][x];
                if (i) draw_pixel(layer, scr->x+x, scr->y+y, colours[i]);
            }
        }
    } else {
    // Draw sprite with opaque background
        for (u8 y=0;y<8;y++) {
            for (u8 x=0;x<8;x++) {
                u8 i = scr->sprite.sprite[y][x];
                draw_pixel(layer, scr->x+x, scr->y+y, colours[i]);
            }
        }
    }
}

void scr_draw_line(ScreenDevice *scr, u16 *layer, u8 draw) {
    s16 x = (s16) scr->x;
    s16 y = (s16) scr->y;
    s16 x_end = (s16) scr->px;
    s16 y_end = (s16) scr->py;

    s32 dx =  abs(x_end - x);
    s32 dy = -abs(y_end - y);
    s16 sx = x < x_end ? 1 : -1;
    s16 sy = y < y_end ? 1 : -1;
    s32 e1 = dx + dy;

    if (draw & 0x10) {
    // Draw 1-bit textured line.
        prepare_1bit_sprite(&scr->sprite, draw);
        u8 c1 = scr->colours >>  8 & 0xf;
        u8 c0 = scr->colours >> 12 & 0xf;
        bool opaque = !(draw & 0x08);
        while (1) {
            if (scr->sprite.sprite[y%8][x%8]) { draw_pixel(layer, x, y, c1); }
            else if (opaque)                  { draw_pixel(layer, x, y, c0); }
            if (x == x_end && y == y_end) return;
            s32 e2 = e1 << 1;
            if (e2 >= dy) { e1 += dy; x += sx; }
            if (e2 <= dx) { e1 += dx; y += sy; }
        }
    } else {
    // Draw solid line.
        u8 colour = draw & 0xf;
        while (1) {
            draw_pixel(layer, x, y, colour);
            if (x == x_end && y == y_end) return;
            s32 e2 = e1 << 1;
            if (e2 >= dy) { e1 += dy; x += sx; }
            if (e2 <= dx) { e1 += dx; y += sy; }
        }
    }
}

void scr_draw_rect(ScreenDevice *scr, u16 *layer, u8 draw) {
    #define SWAP(x,y) {u8 temp=x; x=y; y=temp;}
    #define CLAMP(v,m) {v>0x7fff ? 0 : v>m ? m : v}

    // Get bounding box.
    u16 l = CLAMP(scr->px, PIXELS_WIDTH -1);
    u16 r = CLAMP(scr->x , PIXELS_WIDTH -1);
    u16 t = CLAMP(scr->py, PIXELS_HEIGHT-1);
    u16 b = CLAMP(scr->y , PIXELS_HEIGHT-1);
    if (l>r) SWAP(l,r);
    if (t>b) SWAP(t,b);

    if (draw & 0x10) {
    // Draw 1-bit textured rectangle.
        prepare_1bit_sprite(&scr->sprite, draw);
        u8 c1 = scr->colours >>  8 & 0xf;
        u8 c0 = scr->colours >> 12 & 0xf;
        bool opaque = !(draw & 0x08);
        for (u16 x=l; x<r+1; x++) {
            for (u16 y=t; y<b+1; y++) {
                if (scr->sprite.sprite[y%8][x%8]) { draw_pixel(layer, x, y, c1); }
                else if (opaque)                  { draw_pixel(layer, x, y, c0); }
            }
        }
    } else {
    // Draw solid rectangle.
        u8 colour = draw & 0xf;
        for (u16 x=l; x<r+1; x++) {
            for (u16 y=t; y<b+1; y++) {
                draw_pixel(layer, x, y, colour);
            }
        }
    }
}

void move_cursor(ScreenDevice *scr, u8 move) {
    switch (move >> 6) {
    case 0b00: scr->x += move & 0x3f; return;
    case 0b01: scr->y += move & 0x3f; return;
    case 0b10: scr->x -= move & 0x3f; return;
    case 0b11: scr->y -= move & 0x3f; return;
    }
}