From bcb463a8d0f9abf8aced8fd8fdd35049985e6b1b Mon Sep 17 00:00:00 2001 From: saji Date: Thu, 9 May 2024 17:31:15 -0500 Subject: [PATCH] wip: 4x clock double-read bram --- groovylight/platform/colorlight_5a_75b_8_0.py | 18 +-- groovylight/soc.py | 46 ++++---- verilog/coordinator.sv | 25 +--- verilog/hub75e.sv | 108 ++++++++++++------ verilog/lineram.v | 4 +- verilog/tb/hub75e_tb.sv | 9 +- 6 files changed, 115 insertions(+), 95 deletions(-) diff --git a/groovylight/platform/colorlight_5a_75b_8_0.py b/groovylight/platform/colorlight_5a_75b_8_0.py index a3dc8d7..5bab51c 100644 --- a/groovylight/platform/colorlight_5a_75b_8_0.py +++ b/groovylight/platform/colorlight_5a_75b_8_0.py @@ -93,10 +93,10 @@ _connectors = [ class _CRG(LiteXModule): def __init__(self, platform, sys_clk_freq, with_reset = False): self.cd_sys = ClockDomain("sys") - self.cd_hub = ClockDomain("hub") - # self.cd_sdram = ClockDomain("sdram") - self.cd_sys2x = ClockDomain() - self.cd_sys2x_ps = ClockDomain() + # self.cd_hub = ClockDomain("hub") + self.cd_sys_ps = ClockDomain("sys_ps") + # self.cd_sys2x = ClockDomain() + # self.cd_sys2x_ps = ClockDomain() # Clk / Rst. clk25 = platform.request("clk25") @@ -110,12 +110,12 @@ class _CRG(LiteXModule): pll.register_clkin(clk25, 25e6) pll.create_clkout(self.cd_sys, sys_clk_freq) # for the sdram - # pll.create_clkout(self.cd_sdram, sys_clk_freq, phase=180) - pll.create_clkout(self.cd_sys2x, 2*sys_clk_freq) - pll.create_clkout(self.cd_sys2x_ps, 2*sys_clk_freq, phase=180) + pll.create_clkout(self.cd_sys_ps, sys_clk_freq, phase=180) + # pll.create_clkout(self.cd_sys2x, 2*sys_clk_freq) + # pll.create_clkout(self.cd_sys2x_ps, 2*sys_clk_freq, phase=180) - sdram_clk = ClockSignal("sys2x_ps") - # sdram_clk = ClockSignal("sdram") + # sdram_clk = ClockSignal("sys2x_ps") + sdram_clk = ClockSignal("sys_ps") self.specials += DDROutput(1,0, platform.request("sdram_clock"), sdram_clk) diff --git a/groovylight/soc.py b/groovylight/soc.py index d8927ee..fe3f0f3 100644 --- a/groovylight/soc.py +++ b/groovylight/soc.py @@ -27,28 +27,28 @@ class GroovySoC(SoCCore): self.submodules += self.crg print(kwargs) - if not self.integrated_main_ram_size: - self.sdrphy = HalfRateGENSDRPHY(platform.request("sdram"), sys_clk_freq) - self.add_sdram("sdram", - phy = self.sdrphy, - module = M12L64322A(sys_clk_freq, "1:2"), - l2_cache_size = kwargs.get("l2_size", 8192), - ) - - - self.submodules.ethphy = LiteEthPHYRGMII( - clock_pads= self.platform.request("eth_clocks", 0), - pads = self.platform.request("eth", 0), - tx_delay = 0e-9, # not sure what this is - ) - self.add_csr("ethphy") - self.add_etherbone(phy=self.ethphy, ip_address="192.168.0.36", mac_address = 0x10e2d5000001, data_width=32) - if use_spi: - from litespi.modules import W25Q32JV as SpiFlashModule - from litespi.opcodes import SpiNorFlashOpCodes - self.mem_map["spiflash"] = 0x20000000 - mod = SpiFlashModule(SpiNorFlashOpCodes.READ_1_1_1) - self.add_spi_flash(mode="1x", module=SpiFlashModule, with_master=False) + # if not self.integrated_main_ram_size: + # self.sdrphy = GENSDRPHY(platform.request("sdram"), sys_clk_freq) + # self.add_sdram("sdram", + # phy = self.sdrphy, + # module = M12L64322A(sys_clk_freq, "1:1"), + # l2_cache_size = kwargs.get("l2_size", 8192), + # ) + # + # + # self.submodules.ethphy = LiteEthPHYRGMII( + # clock_pads= self.platform.request("eth_clocks", 0), + # pads = self.platform.request("eth", 0), + # tx_delay = 0e-9, # not sure what this is + # ) + # self.add_csr("ethphy") + # # self.add_etherbone(phy=self.ethphy, ip_address="192.168.0.36", mac_address = 0x10e2d5000001, data_width=32) + # if use_spi: + # from litespi.modules import W25Q32JV as SpiFlashModule + # from litespi.opcodes import SpiNorFlashOpCodes + # self.mem_map["spiflash"] = 0x20000000 + # mod = SpiFlashModule(SpiNorFlashOpCodes.READ_1_1_1) + # self.add_spi_flash(mode="1x", module=SpiFlashModule, with_master=False) self.platform.add_extension(make_hub75_iodevice(0, "j8")) hub_io = self.platform.request("hub75_iodev", 0) self.submodules.hub75 = hub75 = Hub75VerilogDriver() @@ -85,7 +85,7 @@ def main(): trellis_args(parser.add_argument_group('Trellis options')) args = parser.parse_args() platform = Groovy1Platform() - soc = GroovySoC(platform, 60e6, **soc_core_argdict(args)) + soc = GroovySoC(platform, 120e6, **soc_core_argdict(args)) builder = Builder(soc, **builder_argdict(args)) diff --git a/verilog/coordinator.sv b/verilog/coordinator.sv index b29af0e..b6b1232 100644 --- a/verilog/coordinator.sv +++ b/verilog/coordinator.sv @@ -38,30 +38,12 @@ module coordinator ( ); - // slicer signals - wire bitslice_start; - wire [5:0] bitplane_data; - wire [10:0] bitplane_addr; - wire bitplane_wren; - wire bitplane_done; - - bitslicer bslice ( - .clk(clk), - .rgb0(pix_rgb0), - .rgb1(pix_rgb1), - .pixnum(x), - .start_write(bitslice_start), - .bitplane_data(bitplane_data), - .bitplane_addr(bitplane_addr), - .bitplane_wren(bitplane_wren), - .done(bitplane_done) - ); // bram signals - wire [8:0] din; - wire [8:0] dout; - wire [10:0] addr_r; + wire [35:0] din; + wire [35:0] dout; + wire [8:0] addr_r; wire read_clk; wire write_clk; @@ -117,6 +99,7 @@ module coordinator ( state <= StateGenerateLine; end StateGenerateLine: begin + // generate data, move it into the bram. pixgen_start <= 0; if (bitplane_done) begin if (x < 128) begin diff --git a/verilog/hub75e.sv b/verilog/hub75e.sv index fbdc88f..c57a82d 100644 --- a/verilog/hub75e.sv +++ b/verilog/hub75e.sv @@ -9,32 +9,28 @@ module hub75e ( output reg done = 0, // bram interface (using clk) - output [10:0] pixbuf_addr, - input [ 8:0] pixbuf_data + output reg [8:0] pixbuf_addr, + input [35:0] pixbuf_data ); - parameter integer ROW_DEPTH = 128, BIT_DEPTH = 8; + parameter ROW_DEPTH = 128, BIT_DEPTH = 8, BCM_LEN = 32; reg [31:0] counter = 0; - reg [ 3:0] bcm_shift = 7; // which bit of the colors are we currently exposing. + // which bit of the colors are we currently exposing. + reg [$clog2(BIT_DEPTH) - 1:0] bcm_shift = 7; - localparam integer StateInit = 0; - localparam integer StateWriteRow = 1; - localparam integer StateLatchout = 2; + localparam StateInit = 0; + localparam StateWriteRow = 1; + localparam StateLatchout = 2; // the last data that we clock out for the row won't be exposed // in the next writerow state because we'll change addresses. - localparam integer StateFinishExpose = 3; + localparam StateFinishExpose = 3; + + // this state is used to prefetch the first pixel so the cycle can work. + localparam StatePreload = 4; reg [7:0] state = StateInit; // our state - - - // initial begin - // state <= StateInit; - // counter <= 0; - // bcm_shift <= 7; - // end - // The FSM is a bit confusing since it's optimized for *speed* // We can basically display the previous line of data while we write the // next one. So instead of having WRITEROW -> LATCH -> EXPOSE @@ -47,15 +43,21 @@ module hub75e ( // short! wire should_clock, should_expose; - assign should_clock = (counter < ROW_DEPTH * 2 + 1); // the plus 1 is for the falling edge! - assign should_expose = (counter < (16 << bcm_shift + 1)) && (bcm_shift != 7); + // the plus 1 is for the falling edge! + assign should_clock = (counter < (ROW_DEPTH << 2) + 1); + assign should_expose = (counter < (BCM_LEN << bcm_shift + 1)) && (bcm_shift != 7); + + wire [7:0] ram_r, ram_g, ram_b; + assign ram_r = pixbuf_data[23:16]; + assign ram_g = pixbuf_data[15:8]; + assign ram_b = pixbuf_data[7:0]; + wire [2:0] ram_rgb_slice; + assign ram_rgb_slice = { + (ram_r[bcm_shift]), (ram_g[bcm_shift]), (ram_b[bcm_shift]) + }; + reg [7:0] pixnum; - assign pixbuf_addr = {bcm_shift, pixnum}; - always @(*) begin - panel_rgb0 = pixbuf_data[2:0]; - panel_rgb1 = pixbuf_data[5:3]; - end always_ff @(posedge clk) begin counter <= counter + 1; @@ -65,25 +67,61 @@ module hub75e ( counter <= 0; done <= 0; pixnum <= ROW_DEPTH - 1; + pixbuf_addr <= {1'b0, pixnum}; // wait for the signal to write out our lines. if (write_trig) begin - state <= StateWriteRow; + state <= StatePreload; end end + StatePreload: begin + case (counter[1:0]) + 2'b00: begin + // wait for pix 1 + pixbuf_addr <= {1'b1, pixnum}; + end + 2'b01: begin + // load pix 1 + panel_rgb0 <= ram_rgb_slice; + end + 2'b10: begin // rising edge + // store pix2 + panel_rgb1 <= ram_rgb_slice; + // go to writerow + counter <= 0; + state <= StateWriteRow; + end + default: begin + counter <= 0; + end + endcase + end + StateWriteRow: begin if (should_clock) begin // we have data to clock - display_clk <= counter[0]; - if (~counter[0]) begin - // the data from the previous cycle is now ready. - // panel_rgb0 <= pixbuf_data[2:0]; - // panel_rgb1 <= pixbuf_data[5:3]; - // write it out! - end else begin - // update the bram address so it's ready at the next clock cycle. - pixnum <= pixnum - 1; - end + display_clk <= counter[1]; + case (counter[1:0]) + 2'b10: begin // rising edge + // fetch pixel 1 + pixbuf_addr <= {1'b0, pixnum}; + end + 2'b11: begin // midpoint of high clk. + // fetch pixel 2, load pixel 1 + pixbuf_addr <= {1'b1, pixnum}; + panel_rgb0 <= ram_rgb_slice; + end + 2'b00: begin // falling edge + // load pixel 2 + panel_rgb1 <= ram_rgb_slice; + pixnum <= pixnum - 1; + end + 2'b01: begin // midpoint of low clk + // decrement pixnum + end + default: begin + end + endcase end if (should_expose) begin out_enable <= 0; @@ -122,7 +160,7 @@ module hub75e ( StateFinishExpose: begin assert (bcm_shift == 0); - if (counter < (16 << bcm_shift)) begin + if (counter < (BCM_LEN << bcm_shift)) begin out_enable <= 0; end else begin out_enable <= 1; diff --git a/verilog/lineram.v b/verilog/lineram.v index 022b959..cf7921e 100644 --- a/verilog/lineram.v +++ b/verilog/lineram.v @@ -1,6 +1,6 @@ module lineram #( - parameter DATA_WIDTH = 9, - parameter ADDR_WIDTH = 11 + parameter DATA_WIDTH = 36, + parameter ADDR_WIDTH = 9 ) ( input [DATA_WIDTH - 1:0] din, input [ADDR_WIDTH - 1:0] addr_w, diff --git a/verilog/tb/hub75e_tb.sv b/verilog/tb/hub75e_tb.sv index ae6dc2e..979da97 100644 --- a/verilog/tb/hub75e_tb.sv +++ b/verilog/tb/hub75e_tb.sv @@ -36,7 +36,6 @@ module hub75e_tb; hub75e dut ( .clk(clk), .write_trig(write_trig), - .addr(addr_out), .panel_rgb0(rgb0), .panel_rgb1(rgb1), .display_clk(display_clk), @@ -56,12 +55,12 @@ module hub75e_tb; bram_addr_w <= 0; bram_write_en <= 1; repeat (1) @(posedge clk); - for (int i=0; i < 128; i=i+1) begin - bram_data_in <= $urandom % 'hFFFFFF; + for (int i=0; i < 512; i=i+1) begin + bram_data_in <= i + 5; bram_addr_w <= i; repeat (1) @(posedge clk); end - bram_write_en <= 1; + bram_write_en <= 0; write_trig <= 1; repeat (2) @(posedge clk); write_trig <= 0; @@ -71,7 +70,7 @@ module hub75e_tb; $finish(); end initial begin - repeat (100000) @(posedge clk); + repeat (500000) @(posedge clk); $finish(); end endmodule