-
Notifications
You must be signed in to change notification settings - Fork 129
/
Copy pathfpnew_opgroup_multifmt_slice.sv
564 lines (514 loc) · 26.4 KB
/
fpnew_opgroup_multifmt_slice.sv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
// Copyright 2019 ETH Zurich and University of Bologna.
//
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// SPDX-License-Identifier: SHL-0.51
// Author: Stefan Mach <[email protected]>
`include "common_cells/registers.svh"
module fpnew_opgroup_multifmt_slice #(
parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV,
parameter int unsigned Width = 64,
// FPU configuration
parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1,
parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1,
parameter logic EnableVectors = 1'b1,
parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI,
parameter int unsigned NumPipeRegs = 0,
parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE,
parameter logic ExtRegEna = 1'b0,
parameter type TagType = logic,
// Do not change
localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup),
localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS,
localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors),
localparam type MaskType = logic [NUM_SIMD_LANES-1:0],
localparam int unsigned ExtRegEnaWidth = NumPipeRegs == 0 ? 1 : NumPipeRegs
) (
input logic clk_i,
input logic rst_ni,
// Input signals
input logic [NUM_OPERANDS-1:0][Width-1:0] operands_i,
input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i,
input fpnew_pkg::roundmode_e rnd_mode_i,
input fpnew_pkg::operation_e op_i,
input logic op_mod_i,
input fpnew_pkg::fp_format_e src_fmt_i,
input fpnew_pkg::fp_format_e dst_fmt_i,
input fpnew_pkg::int_format_e int_fmt_i,
input logic vectorial_op_i,
input TagType tag_i,
input MaskType simd_mask_i,
// Input Handshake
input logic in_valid_i,
output logic in_ready_o,
input logic flush_i,
// Output signals
output logic [Width-1:0] result_o,
output fpnew_pkg::status_t status_o,
output logic extension_bit_o,
output TagType tag_o,
// Output handshake
output logic out_valid_o,
input logic out_ready_i,
// Indication of valid data in flight
output logic busy_o,
// External register enable override
input logic [ExtRegEnaWidth-1:0] reg_ena_i
);
if ((OpGroup == fpnew_pkg::DIVSQRT)) begin
if ((DivSqrtSel == fpnew_pkg::TH32) && !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin
$fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \
Set DivSqrtSel = THMULTI or DivSqrtSel = PULP to use a multi-format divider");
end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1)) begin
$warning("The DivSqrt unit of C910 (instantiated by DivSqrtSel = THMULTI) does not support \
FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8.");
end
end
localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig);
localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig);
localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1);
localparam int unsigned NUM_DIVSQRT_LANES = fpnew_pkg::num_divsqrt_lanes(Width, FpFmtConfig, 1'b1, DivSqrtSel);
localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS;
// We will send the format information along with the data
localparam int unsigned FMT_BITS =
fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS));
localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags
logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes
logic vectorial_op;
logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation
logic [AUX_BITS-1:0] aux_data;
// additional flags for CONV
logic dst_fmt_is_int, dst_is_cpk;
logic [1:0] dst_vec_op; // info for vectorial results (for packing)
logic [2:0] target_aux_d;
logic is_up_cast, is_down_cast;
logic [NUM_FORMATS-1:0][Width-1:0] fmt_slice_result;
logic [NUM_INT_FORMATS-1:0][Width-1:0] ifmt_slice_result;
logic [Width-1:0] conv_target_d, conv_target_q; // vectorial conversions update a register
fpnew_pkg::status_t [NUM_LANES-1:0] lane_status;
logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used
TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used
logic [NUM_LANES-1:0] lane_masks;
logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used
logic [NUM_LANES-1:0] lane_busy; // dito
logic result_is_vector;
logic [FMT_BITS-1:0] result_fmt;
logic result_fmt_is_int, result_is_cpk;
logic [1:0] result_vec_op; // info for vectorial results (for packing)
logic simd_synch_rdy, simd_synch_done;
// -----------
// Input Side
// -----------
assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane
assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled
// Cast-and-Pack ops are encoded in operation and modifier
assign dst_fmt_is_int = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::F2I);
assign dst_is_cpk = (OpGroup == fpnew_pkg::CONV) & (op_i == fpnew_pkg::CPKAB ||
op_i == fpnew_pkg::CPKCD);
assign dst_vec_op = (OpGroup == fpnew_pkg::CONV) & {(op_i == fpnew_pkg::CPKCD), op_mod_i};
assign is_up_cast = (fpnew_pkg::fp_width(dst_fmt_i) > fpnew_pkg::fp_width(src_fmt_i));
assign is_down_cast = (fpnew_pkg::fp_width(dst_fmt_i) < fpnew_pkg::fp_width(src_fmt_i));
// The destination format is the int format for F2I casts
assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i;
// The data sent along consists of the vectorial flag and format bits
assign aux_data = {dst_fmt_is_int, vectorial_op, dst_fmt};
assign target_aux_d = {dst_vec_op, dst_is_cpk};
// CONV passes one operand for assembly after the unit: opC for cpk, opB for others
if (OpGroup == fpnew_pkg::CONV) begin : conv_target
assign conv_target_d = dst_is_cpk ? operands_i[2] : operands_i[1];
end else begin : not_conv_target
assign conv_target_d = '0;
end
// For 2-operand units, prepare boxing info
logic [NUM_FORMATS-1:0] is_boxed_1op;
logic [NUM_FORMATS-1:0][1:0] is_boxed_2op;
always_comb begin : boxed_2op
for (int fmt = 0; fmt < NUM_FORMATS; fmt++) begin
is_boxed_1op[fmt] = is_boxed_i[fmt][0];
is_boxed_2op[fmt] = is_boxed_i[fmt][1:0];
end
end
// ---------------
// Generate Lanes
// ---------------
for (genvar lane = 0; lane < int'(NUM_LANES); lane++) begin : gen_num_lanes
localparam int unsigned LANE = unsigned'(lane); // unsigned to please the linter
// Get a mask of active formats for this lane
localparam fpnew_pkg::fmt_logic_t ACTIVE_FORMATS =
fpnew_pkg::get_lane_formats(Width, FpFmtConfig, LANE);
localparam fpnew_pkg::ifmt_logic_t ACTIVE_INT_FORMATS =
fpnew_pkg::get_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
localparam int unsigned MAX_WIDTH = fpnew_pkg::max_fp_width(ACTIVE_FORMATS);
// Cast-specific parameters
localparam fpnew_pkg::fmt_logic_t CONV_FORMATS =
fpnew_pkg::get_conv_lane_formats(Width, FpFmtConfig, LANE);
localparam fpnew_pkg::ifmt_logic_t CONV_INT_FORMATS =
fpnew_pkg::get_conv_lane_int_formats(Width, FpFmtConfig, IntFmtConfig, LANE);
localparam int unsigned CONV_WIDTH = fpnew_pkg::max_fp_width(CONV_FORMATS);
// Lane parameters from Opgroup
localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV)
? CONV_FORMATS : ACTIVE_FORMATS;
localparam int unsigned LANE_WIDTH = (OpGroup == fpnew_pkg::CONV) ? CONV_WIDTH : MAX_WIDTH;
logic [LANE_WIDTH-1:0] local_result; // lane-local results
// Generate instances only if needed, lane 0 always generated
if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane
logic in_valid, out_valid, out_ready; // lane-local handshake
logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands
logic [LANE_WIDTH-1:0] op_result; // lane-local results
fpnew_pkg::status_t op_status;
assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors
// Slice out the operands for this lane, upper bits are ignored in the unit
always_comb begin : prepare_input
for (int unsigned i = 0; i < NUM_OPERANDS; i++) begin
if (i == 2) begin
local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(op_i == fpnew_pkg::ADDS ? src_fmt_i : dst_fmt_i);
end else begin
local_operands[i] = operands_i[i] >> LANE*fpnew_pkg::fp_width(src_fmt_i);
end
end
// override operand 0 for some conversions
if (OpGroup == fpnew_pkg::CONV) begin
// Source is an integer
if (op_i == fpnew_pkg::I2F) begin
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::int_width(int_fmt_i);
// vectorial F2F up casts
end else if (op_i == fpnew_pkg::F2F) begin
if (vectorial_op && op_mod_i && is_up_cast) begin // up cast with upper half
local_operands[0] = operands_i[0] >> LANE*fpnew_pkg::fp_width(src_fmt_i) +
MAX_FP_WIDTH/2;
end
// CPK
end else if (dst_is_cpk) begin
if (lane == 1) begin
local_operands[0] = operands_i[1][LANE_WIDTH-1:0]; // using opB as second argument
end
end
end
end
// Instantiate the operation from the selected opgroup
if (OpGroup == fpnew_pkg::ADDMUL) begin : lane_instance
fpnew_fma_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_fma_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands ),
.is_boxed_i,
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.src2_fmt_i ( op_i == fpnew_pkg::ADDS ? src_fmt_i : dst_fmt_i ),
.dst_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] ),
.reg_ena_i
);
end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance
if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt
// The T-head-based DivSqrt unit is supported only in FP32-only configurations
fpnew_divsqrt_th_32 #(
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_divsqrt_multi_th (
.clk_i,
.rst_ni,
.operands_i ( local_operands[1:0] ), // 2 operands
.is_boxed_i ( is_boxed_2op ), // 2 operands
.rnd_mode_i,
.op_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] ),
.reg_ena_i
);
end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt
fpnew_divsqrt_th_64_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_divsqrt_th_64_c910 (
.clk_i,
.rst_ni,
.operands_i ( local_operands[1:0] ), // 2 operands
.is_boxed_i ( is_boxed_2op ), // 2 operands
.rnd_mode_i,
.op_i,
.dst_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.divsqrt_done_o ( divsqrt_done[lane] ),
.simd_synch_done_i( simd_synch_done ),
.divsqrt_ready_o ( divsqrt_ready[lane] ),
.simd_synch_rdy_i ( simd_synch_rdy ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] ),
.reg_ena_i
);
end else begin : gen_pulp_divsqrt
fpnew_divsqrt_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_divsqrt_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands[1:0] ), // 2 operands
.is_boxed_i ( is_boxed_2op ), // 2 operands
.rnd_mode_i,
.op_i,
.dst_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.vectorial_op_i ( vectorial_op ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.divsqrt_done_o ( divsqrt_done[lane] ),
.simd_synch_done_i( simd_synch_done ),
.divsqrt_ready_o ( divsqrt_ready[lane] ),
.simd_synch_rdy_i ( simd_synch_rdy ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] ),
.reg_ena_i
);
end
end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance
end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance
fpnew_cast_multi #(
.FpFmtConfig ( LANE_FORMATS ),
.IntFmtConfig ( CONV_INT_FORMATS ),
.NumPipeRegs ( NumPipeRegs ),
.PipeConfig ( PipeConfig ),
.TagType ( TagType ),
.AuxType ( logic [AUX_BITS-1:0] )
) i_fpnew_cast_multi (
.clk_i,
.rst_ni,
.operands_i ( local_operands[0] ),
.is_boxed_i ( is_boxed_1op ),
.rnd_mode_i,
.op_i,
.op_mod_i,
.src_fmt_i,
.dst_fmt_i,
.int_fmt_i,
.tag_i,
.mask_i ( simd_mask_i[lane] ),
.aux_i ( aux_data ),
.in_valid_i ( in_valid ),
.in_ready_o ( lane_in_ready[lane] ),
.flush_i,
.result_o ( op_result ),
.status_o ( op_status ),
.extension_bit_o ( lane_ext_bit[lane] ),
.tag_o ( lane_tags[lane] ),
.mask_o ( lane_masks[lane] ),
.aux_o ( lane_aux[lane] ),
.out_valid_o ( out_valid ),
.out_ready_i ( out_ready ),
.busy_o ( lane_busy[lane] ),
.reg_ena_i
);
end // ADD OTHER OPTIONS HERE
// Handshakes are only done if the lane is actually used
assign out_ready = out_ready_i & ((lane == 0) | result_is_vector);
assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector);
// Properly NaN-box or sign-extend the slice result if not in use
assign local_result = (lane_out_valid[lane] | ExtRegEna) ? op_result : '{default: lane_ext_bit[0]};
assign lane_status[lane] = (lane_out_valid[lane] | ExtRegEna) ? op_status : '0;
// Otherwise generate constant sign-extension
end else begin : inactive_lane
assign lane_out_valid[lane] = 1'b0; // unused lane
assign lane_in_ready[lane] = 1'b0; // unused lane
assign lane_aux[lane] = 1'b0; // unused lane
assign lane_masks[lane] = 1'b1; // unused lane
assign lane_tags[lane] = 1'b0; // unused lane
assign divsqrt_done[lane] = 1'b0; // unused lane
assign divsqrt_ready[lane] = 1'b0; // unused lane
assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane
assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box
assign lane_status[lane] = '0;
assign lane_busy[lane] = 1'b0;
end
// Generate result packing depending on float format
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : pack_fp_result
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
// only for active formats within the lane
if (ACTIVE_FORMATS[fmt]) begin
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
local_result[FP_WIDTH-1:0];
end else if ((LANE+1)*FP_WIDTH <= Width) begin
assign fmt_slice_result[fmt][(LANE+1)*FP_WIDTH-1:LANE*FP_WIDTH] =
'{default: lane_ext_bit[LANE]};
end else if (LANE*FP_WIDTH < Width) begin
assign fmt_slice_result[fmt][Width-1:LANE*FP_WIDTH] =
'{default: lane_ext_bit[LANE]};
end
end
// Generate result packing depending on integer format
if (OpGroup == fpnew_pkg::CONV) begin : int_results_enabled
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : pack_int_result
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (ACTIVE_INT_FORMATS[ifmt]) begin
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] =
local_result[INT_WIDTH-1:0];
end else if ((LANE+1)*INT_WIDTH <= Width) begin
assign ifmt_slice_result[ifmt][(LANE+1)*INT_WIDTH-1:LANE*INT_WIDTH] = '0;
end else if (LANE*INT_WIDTH < Width) begin
assign ifmt_slice_result[ifmt][Width-1:LANE*INT_WIDTH] = '0;
end
end
end
end
// Extend slice result if needed
for (genvar fmt = 0; fmt < NUM_FORMATS; fmt++) begin : extend_fp_result
// Set up some constants
localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt));
if (NUM_LANES*FP_WIDTH < Width)
assign fmt_slice_result[fmt][Width-1:NUM_LANES*FP_WIDTH] = '{default: lane_ext_bit[0]};
end
for (genvar ifmt = 0; ifmt < NUM_INT_FORMATS; ifmt++) begin : extend_or_mute_int_result
// Mute int results if unused
if (OpGroup != fpnew_pkg::CONV) begin : mute_int_result
assign ifmt_slice_result[ifmt] = '0;
// Extend slice result if needed
end else begin : extend_int_result
// Set up some constants
localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
if (NUM_LANES*INT_WIDTH < Width)
assign ifmt_slice_result[ifmt][Width-1:NUM_LANES*INT_WIDTH] = '0;
end
end
// Bypass lanes with target operand for vectorial casts
if (OpGroup == fpnew_pkg::CONV) begin : target_regs
// Bypass pipeline signals, index i holds signal after i register stages
logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q;
logic [0:NumPipeRegs][2:0] byp_pipe_aux_q;
logic [0:NumPipeRegs] byp_pipe_valid_q;
// Ready signal is combinatorial for all stages
logic [0:NumPipeRegs] byp_pipe_ready;
// Input stage: First element of pipeline is taken from inputs
assign byp_pipe_target_q[0] = conv_target_d;
assign byp_pipe_aux_q[0] = target_aux_d;
assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op;
// Generate the register stages
for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline
// Internal register enable for this stage
logic reg_ena;
// Determine the ready signal of the current stage - advance the pipeline:
// 1. if the next stage is ready for our data
// 2. if the next stage only holds a bubble (not valid) -> we can pop it
assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1];
// Valid: enabled by ready signal, synchronous clear with the flush signal
`FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni)
// Enable register if pipleine ready and a valid data item is present
assign reg_ena = (byp_pipe_ready[i] & byp_pipe_valid_q[i]) | reg_ena_i[i];
// Generate the pipeline registers within the stages, use enable-registers
`FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0)
`FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0)
end
// Output stage: Ready travels backwards from output side, driven by downstream circuitry
assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector;
// Output stage: assign module outputs
assign conv_target_q = byp_pipe_target_q[NumPipeRegs];
// decode the aux data
assign {result_vec_op, result_is_cpk} = byp_pipe_aux_q[NumPipeRegs];
end else begin : no_conv
assign {result_vec_op, result_is_cpk} = '0;
assign conv_target_q = '0;
end
if ((DivSqrtSel != fpnew_pkg::TH32) && !ExtRegEna) begin
// Synch lanes if there is more than one
assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0];
assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0];
end else begin
// Unused (TH32 divider only supported for scalar FP32 divsqrt)
assign simd_synch_rdy = '0;
assign simd_synch_done = '0;
end
// ------------
// Output Side
// ------------
assign {result_fmt_is_int, result_is_vector, result_fmt} = lane_aux[0];
assign result_o = result_fmt_is_int
? ifmt_slice_result[result_fmt]
: fmt_slice_result[result_fmt];
assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones
assign tag_o = lane_tags[0]; // don't care about upper ones
assign busy_o = (| lane_busy);
assign out_valid_o = lane_out_valid[0]; // don't care about upper ones
// Collapse the status
always_comb begin : output_processing
// Collapse the status
automatic fpnew_pkg::status_t temp_status;
temp_status = '0;
for (int i = 0; i < int'(NUM_LANES); i++)
temp_status |= lane_status[i] & {5{lane_masks[i]}};
status_o = temp_status;
end
endmodule