/*****************************************************************************/
//
// Module	   : cae_pers.vpp
// Revision	   : $Revision: 1.34 $
// Last Modified On: $Date: 2011/11/21 16:27:46 $
// Last Modified By: $Author: mruff $
//
//-----------------------------------------------------------------------------
//
// Original Author : gedwards
// Created On      : Wed Oct 10 09:26:08 2007
//
//-----------------------------------------------------------------------------
//
// Description     : Sample PDK Vector Add Personality
//
//                   Top-level of vadd personality.  For a complete list of 
//                   optional ports, see 
//                   /opt/convey/pdk/<rev>/<platform>/doc/cae_pers.v
//
//-----------------------------------------------------------------------------
//
// Copyright (c) 2007-2011 : created by Convey Computer Corp. This model is the
// confidential and proprietary property of Convey Computer Corp.
//
/*****************************************************************************/
/* $Id: cae_pers.vpp,v 1.34 2011/11/21 16:27:46 mruff Exp $ */

`timescale 1 ns / 1 ps

`include "pdk_fpga_defines.vh"

(* keep_hierarchy = "true" *)
module cae_pers (
   input		clk_csr,
   input		clk,
   input		clk2x,
   input		i_reset,
   input		i_csr_reset_n,
   input  [1:0]		i_aeid,

   input		ppll_reset,
   output		ppll_locked,
   output		clk_per,

   //
   // Dispatch Interface
   //
   input  [31:0]	cae_inst,
   input  [63:0]	cae_data,
   input		cae_inst_vld,

   output [17:0]	cae_aeg_cnt,
   output [15:0]	cae_exception,
   output [63:0]	cae_ret_data,
   output		cae_ret_data_vld,
   output		cae_idle,
   output		cae_stall,

   //
   // MC Interface(s)
   //
   output		mc0_req_ld_e, mc0_req_ld_o,
   output		mc0_req_st_e, mc0_req_st_o,
   output [1:0]		mc0_req_size_e, mc0_req_size_o,
   output [47:0]	mc0_req_vadr_e, mc0_req_vadr_o,
   output [63:0]	mc0_req_wrd_rdctl_e, mc0_req_wrd_rdctl_o,
   output		mc0_rsp_stall_e, mc0_rsp_stall_o,
   input		mc0_rd_rq_stall_e, mc0_rd_rq_stall_o,
   input		mc0_wr_rq_stall_e, mc0_wr_rq_stall_o,
   input  [63:0]	mc0_rsp_data_e, mc0_rsp_data_o,
   input		mc0_rsp_push_e, mc0_rsp_push_o,
   input  [31:0]	mc0_rsp_rdctl_e, mc0_rsp_rdctl_o,
   output		mc1_req_ld_e, mc1_req_ld_o,
   output		mc1_req_st_e, mc1_req_st_o,
   output [1:0]		mc1_req_size_e, mc1_req_size_o,
   output [47:0]	mc1_req_vadr_e, mc1_req_vadr_o,
   output [63:0]	mc1_req_wrd_rdctl_e, mc1_req_wrd_rdctl_o,
   output		mc1_rsp_stall_e, mc1_rsp_stall_o,
   input		mc1_rd_rq_stall_e, mc1_rd_rq_stall_o,
   input		mc1_wr_rq_stall_e, mc1_wr_rq_stall_o,
   input  [63:0]	mc1_rsp_data_e, mc1_rsp_data_o,
   input		mc1_rsp_push_e, mc1_rsp_push_o,
   input  [31:0]	mc1_rsp_rdctl_e, mc1_rsp_rdctl_o,
   output		mc2_req_ld_e, mc2_req_ld_o,
   output		mc2_req_st_e, mc2_req_st_o,
   output [1:0]		mc2_req_size_e, mc2_req_size_o,
   output [47:0]	mc2_req_vadr_e, mc2_req_vadr_o,
   output [63:0]	mc2_req_wrd_rdctl_e, mc2_req_wrd_rdctl_o,
   output		mc2_rsp_stall_e, mc2_rsp_stall_o,
   input		mc2_rd_rq_stall_e, mc2_rd_rq_stall_o,
   input		mc2_wr_rq_stall_e, mc2_wr_rq_stall_o,
   input  [63:0]	mc2_rsp_data_e, mc2_rsp_data_o,
   input		mc2_rsp_push_e, mc2_rsp_push_o,
   input  [31:0]	mc2_rsp_rdctl_e, mc2_rsp_rdctl_o,
   output		mc3_req_ld_e, mc3_req_ld_o,
   output		mc3_req_st_e, mc3_req_st_o,
   output [1:0]		mc3_req_size_e, mc3_req_size_o,
   output [47:0]	mc3_req_vadr_e, mc3_req_vadr_o,
   output [63:0]	mc3_req_wrd_rdctl_e, mc3_req_wrd_rdctl_o,
   output		mc3_rsp_stall_e, mc3_rsp_stall_o,
   input		mc3_rd_rq_stall_e, mc3_rd_rq_stall_o,
   input		mc3_wr_rq_stall_e, mc3_wr_rq_stall_o,
   input  [63:0]	mc3_rsp_data_e, mc3_rsp_data_o,
   input		mc3_rsp_push_e, mc3_rsp_push_o,
   input  [31:0]	mc3_rsp_rdctl_e, mc3_rsp_rdctl_o,
   output		mc4_req_ld_e, mc4_req_ld_o,
   output		mc4_req_st_e, mc4_req_st_o,
   output [1:0]		mc4_req_size_e, mc4_req_size_o,
   output [47:0]	mc4_req_vadr_e, mc4_req_vadr_o,
   output [63:0]	mc4_req_wrd_rdctl_e, mc4_req_wrd_rdctl_o,
   output		mc4_rsp_stall_e, mc4_rsp_stall_o,
   input		mc4_rd_rq_stall_e, mc4_rd_rq_stall_o,
   input		mc4_wr_rq_stall_e, mc4_wr_rq_stall_o,
   input  [63:0]	mc4_rsp_data_e, mc4_rsp_data_o,
   input		mc4_rsp_push_e, mc4_rsp_push_o,
   input  [31:0]	mc4_rsp_rdctl_e, mc4_rsp_rdctl_o,
   output		mc5_req_ld_e, mc5_req_ld_o,
   output		mc5_req_st_e, mc5_req_st_o,
   output [1:0]		mc5_req_size_e, mc5_req_size_o,
   output [47:0]	mc5_req_vadr_e, mc5_req_vadr_o,
   output [63:0]	mc5_req_wrd_rdctl_e, mc5_req_wrd_rdctl_o,
   output		mc5_rsp_stall_e, mc5_rsp_stall_o,
   input		mc5_rd_rq_stall_e, mc5_rd_rq_stall_o,
   input		mc5_wr_rq_stall_e, mc5_wr_rq_stall_o,
   input  [63:0]	mc5_rsp_data_e, mc5_rsp_data_o,
   input		mc5_rsp_push_e, mc5_rsp_push_o,
   input  [31:0]	mc5_rsp_rdctl_e, mc5_rsp_rdctl_o,
   output		mc6_req_ld_e, mc6_req_ld_o,
   output		mc6_req_st_e, mc6_req_st_o,
   output [1:0]		mc6_req_size_e, mc6_req_size_o,
   output [47:0]	mc6_req_vadr_e, mc6_req_vadr_o,
   output [63:0]	mc6_req_wrd_rdctl_e, mc6_req_wrd_rdctl_o,
   output		mc6_rsp_stall_e, mc6_rsp_stall_o,
   input		mc6_rd_rq_stall_e, mc6_rd_rq_stall_o,
   input		mc6_wr_rq_stall_e, mc6_wr_rq_stall_o,
   input  [63:0]	mc6_rsp_data_e, mc6_rsp_data_o,
   input		mc6_rsp_push_e, mc6_rsp_push_o,
   input  [31:0]	mc6_rsp_rdctl_e, mc6_rsp_rdctl_o,
   output		mc7_req_ld_e, mc7_req_ld_o,
   output		mc7_req_st_e, mc7_req_st_o,
   output [1:0]		mc7_req_size_e, mc7_req_size_o,
   output [47:0]	mc7_req_vadr_e, mc7_req_vadr_o,
   output [63:0]	mc7_req_wrd_rdctl_e, mc7_req_wrd_rdctl_o,
   output		mc7_rsp_stall_e, mc7_rsp_stall_o,
   input		mc7_rd_rq_stall_e, mc7_rd_rq_stall_o,
   input		mc7_wr_rq_stall_e, mc7_wr_rq_stall_o,
   input  [63:0]	mc7_rsp_data_e, mc7_rsp_data_o,
   input		mc7_rsp_push_e, mc7_rsp_push_o,
   input  [31:0]	mc7_rsp_rdctl_e, mc7_rsp_rdctl_o,

   //
   // Write flush
   //
   output		mc0_req_flush_e, mc0_req_flush_o,
   input		mc0_rsp_flush_cmplt_e, mc0_rsp_flush_cmplt_o,
   output		mc1_req_flush_e, mc1_req_flush_o,
   input		mc1_rsp_flush_cmplt_e, mc1_rsp_flush_cmplt_o,
   output		mc2_req_flush_e, mc2_req_flush_o,
   input		mc2_rsp_flush_cmplt_e, mc2_rsp_flush_cmplt_o,
   output		mc3_req_flush_e, mc3_req_flush_o,
   input		mc3_rsp_flush_cmplt_e, mc3_rsp_flush_cmplt_o,
   output		mc4_req_flush_e, mc4_req_flush_o,
   input		mc4_rsp_flush_cmplt_e, mc4_rsp_flush_cmplt_o,
   output		mc5_req_flush_e, mc5_req_flush_o,
   input		mc5_rsp_flush_cmplt_e, mc5_rsp_flush_cmplt_o,
   output		mc6_req_flush_e, mc6_req_flush_o,
   input		mc6_rsp_flush_cmplt_e, mc6_rsp_flush_cmplt_o,
   output		mc7_req_flush_e, mc7_req_flush_o,
   input		mc7_rsp_flush_cmplt_e, mc7_rsp_flush_cmplt_o,

   //
   // Management/Debug Interface
   //
   input  [3:0]		cae_ring_ctl_in,
   input  [15:0]	cae_ring_data_in,
   output [3:0]		cae_ring_ctl_out,
   output [15:0]	cae_ring_data_out,

   input		csr_31_31_intlv_dis
);

`include "pdk_fpga_param.vh"

   //
   // Local clock generation
   //
   (* KEEP = "true" *) wire reset_per;
   cae_clock clock (
      .clk(clk),
      .i_reset(i_reset),
      .ppll_reset(ppll_reset),

      .clk_per(clk_per),
      .ppll_locked(ppll_locked),
      .reset_per(reset_per)
   );


   //
   // Instruction decode
   //
   wire [4:0]	inst_caep;
   wire [17:0]	inst_aeg_idx;
   instdec dec (
      .cae_inst(cae_inst),
      .cae_data(cae_data),
      .cae_inst_vld(cae_inst_vld),

      .inst_val(inst_val),
      .inst_caep(inst_caep),
      .inst_aeg_wr(inst_aeg_wr),
      .inst_aeg_rd(inst_aeg_rd),
      .inst_aeg_idx(inst_aeg_idx),
      .err_unimpl(err_unimpl)
   );


   //**************************************************************************
   //			   PERSONALITY SPECIFIC LOGIC
   //**************************************************************************

   //
   // AEG[0..NA-1] Registers
   //
   localparam NA = 5;
   localparam NB = 3;		// Number of bits to represent NAEG

   assign cae_aeg_cnt = NA;

   reg		r_st_tot;
   reg  [64:0]	r_sum;
   wire [63:0]	cae_csr_scratch, w_aeg[NA-1:0];

   wire xbar_enabled = MC_XBAR;

   genvar g;
   generate for (g=0; g<NA; g=g+1) begin : g0
      reg [63:0] c_aeg, r_aeg;

      always @* begin
	 case (g)
	    4: c_aeg = {63'h0, xbar_enabled};
	 default: c_aeg = r_aeg;
	 endcase
      end

      wire c_aeg_we = inst_aeg_wr && inst_aeg_idx[NB-1:0] == g;

      always @(posedge clk) begin
	 if (c_aeg_we)
	    r_aeg <= cae_data;
	 else
	    r_aeg <= c_aeg;
      end
      assign w_aeg[g] = r_aeg;
   end endgenerate

   reg		r_ret_val, r_err_unimpl, r_err_aegidx;
   reg [63:0]	r_ret_data;

   wire c_val_aegidx = inst_aeg_idx < NA;

   always @(posedge clk) begin
      r_ret_val    <= inst_aeg_rd && c_val_aegidx;
      r_ret_data   <= w_aeg[inst_aeg_idx[NB-1:0]];
      r_err_aegidx <= (inst_aeg_wr || inst_aeg_rd) && !c_val_aegidx;
      r_err_unimpl <= err_unimpl || (inst_val && inst_caep !== 'd0);
   end
   assign cae_ret_data_vld = r_ret_val;
   assign cae_ret_data     = r_ret_data;

   assign cae_exception[1:0] = {r_err_aegidx, r_err_unimpl};

   //
   // Dispatch logic
   //
   wire c_caep00 = inst_val && inst_caep == 5'd0 && csr_31_31_intlv_dis;

   wire [15:0]	r_sum_ovrflw_vec, r_res_ovrflw_vec, r_sum_vld_vec;
   reg		r_caep00, r_idle, r_unaligned_addr, r_err_intlv;

   always @(posedge clk_per) begin
      r_caep00 <= c_caep00;
      r_idle   <= cae_idle;

      r_unaligned_addr <= inst_val && inst_caep == 5'd0 && c_unaligned_addr;
      r_err_intlv      <= inst_val && !csr_31_31_intlv_dis;
   end

   assign cae_exception[15:2] = {12'b0,
				 r_unaligned_addr,
				 r_err_intlv};


   assign cae_idle  = !r_caep00 && done;
   assign cae_stall = c_caep00 || r_caep00;

   //
   // Management Interface (Control/Status)   
   //
   assign cae_ring_ctl_out = cae_ring_ctl_in;
   assign cae_ring_data_out = cae_ring_data_in;

   //
   // Controller
   //
   wire[47:0] load_address;
   wire[47:0] store_address;
   wire store_valid;
   wire shift;
   
   // Shift signal -- '1' if all MC loading data have valid data; '0' otherwise
   assign shift = mc0_rsp_push_o && mc1_rsp_push_o && mc2_rsp_push_o && mc3_rsp_push_o && 
                  mc4_rsp_push_o && mc5_rsp_push_o && mc6_rsp_push_o && mc7_rsp_push_o;
   
   // Read AEG registers
   wire [47:0]	base_load_address  = w_aeg[0][47:0];
   wire [47:0]	base_store_address = w_aeg[1][47:0];
   wire [63:0]	rows               = w_aeg[2][63:0];
   wire [63:0]	columns            = w_aeg[3][63:0];
   
   Controller CNTRL(
        // Inputs
        i_aeid[1:0], 
        base_load_address[47:0], 
        base_store_address[47:0],
        rows[63:0], 
        columns[63:0],
        shift,
        reset_per,
        clk_per,
        r_idle,
        r_caep00,
        // Outputs
        load_address[47:0],
        store_address[47:0],
        store_valid,
        done
   );
   
   //
   // Map BigShifter's, MC, Controller, and Sobel's
   //
   wire r0c0, r1c0, r2c0;
   
   BigShifter BS0(mc0_rsp_data_o, shift, clk,
         c1_data_out_row1, c1_data_out_row2, c1_data_out_row3: out std_logic_vector(7 downto 0);
         c2_data_out_row1, c2_data_out_row2, c2_data_out_row3: out std_logic_vector(7 downto 0);
         c3_data_out_row1, c3_data_out_row2, c3_data_out_row3: out std_logic_vector(7 downto 0);
         c4_data_out_row1, c4_data_out_row2, c4_data_out_row3: out std_logic_vector(7 downto 0);
         c5_data_out_row1, c5_data_out_row2, c5_data_out_row3: out std_logic_vector(7 downto 0);
         c6_data_out_row1, c6_data_out_row2, c6_data_out_row3: out std_logic_vector(7 downto 0);
         c7_data_out_row1, c7_data_out_row2, c7_data_out_row3: out std_logic_vector(7 downto 0);
         c8_data_out_row1, c8_data_out_row2, c8_data_out_row3: out std_logic_vector(7 downto 0)
   );
   
   BigShifter BS1(
        // TODO
   );
   
   BigShifter BS2(
        // TODO
   );
   
   BigShifter BS3(
        // TODO
   );
  
   BigShifter BS4(
        // TODO
   );
   
   BigShifter BS5(
        // TODO
   );
   
   BigShifter BS6(
        // TODO
   );
   
   BigShifter BS7(
        // TODO
   );
   
   Sobel S0 (
        // TODO
   );
   
   Sobel S1 (
        // TODO
   );
   
   // ...
   
   Sobel S60 (
        // TODO
   );
   
   Sobel S61 (
        // TODO
   );
   
   
endmodule // cae_pers