Transformation and VHDL Code Generation from Coarse-grained Dataflow Graph by Oh, Moonwook & Ha, Soonhoi
Transformation and VHDL Code Generation
from Coarse-grained Dataflow Graph
Moonwook Oh
Department of Computer Engineering, Seoul National University





2 Correctness of Operation 3
















































  /.0%b=' 3 ;;+	 4	>D+@4&	C
 --)-+--+-+-+-
13















K;   %b "










































	   C$ @&	ObA	 +P	D> 3 ;4G3	
 ! @&	4P	
	   C$ @&	Ob
 $2b?3&b 4	3&bg;;d3




O 4U 	3D>  U27 (5 5> 	


















>  ='Ab	 72@Q	43?;&	>2O O5>?	




 ! >D 
);7>OPQ2O
;3 4
%	 b4>DO b4 3PO> 
3 PO>D 3&bO ; 6@ O 43 






















YP@ 	  34		  b@b >7>V6O 43 B34	C
D]Vb; d	C
	   C$







">? 	;O;')* 	a ;6 OB 3 B3	P 
b;

4	  ='  .   
	85Z3 ;;4@ 4"7Q
PQZ_S4	"[  _ >   "
D
	)["4D@ "7"D
P4 G$2b?3&b  B4D@U;7>O 4R ^"45>=	C
D	>9<746!
   	>?6P542	P')* 	>@P$2Db 	8'	   
Y=Ub Q5  X+b '89* 	4 'Y 
Q>UDB3 
4	4  =' @4
	











	>+3 &3 5D 
 	 =' 4
3 
DO;
D  %b ;4D@
O	bg9b4-">?2?>D>56&	C
4R @A  	 W%%b




















	 ='`3 ;;V@"" Q5	>>DD   43	5d


















Figure 1.1: (a) The design path of current EDA tools. (b) The design path of DFG
based future EDA tools.
')* D       K   FG5;dPQ 2b4 	












P6@B34	Q>  D   K  9	 D P(3	6 )b  474  	D> gO
;;53 
	3 
43  B4b	 7"D52Z "Qb4
 
4Xb	&;$%	
    G `b> 6P  ;6	 3;!
3 44 $2Db !T@
	D4 ')*+     a$GX	X	>  Q4 






	D  ')*+4ULN 	g3 Q	&6!T@
	D4g')* 
4	3&b "    $2 























































b4?VY2 4$%	&  $V ;O>?	 6VP;
	>?+ ='`3 ;;@X6 ">D
$2Db 	5;P	?3	>D> U@4








bR_S 	Q[="9	>> U$G)	 
-3 3 >5D  
3 >?	




%bG'89* P">"DV_S4	"[R 	6 + 
b"3&b
5A	C
	   C$ #^'8 A@&	Ob    















U	 =' 3 ;;U$G b5> DPO>PQ
b4	Q?393	5>> )D b%3 ;;G6=b	A
b
45>D@=bB	&;$G	ZbB	Ab%	P
 b	 7"5DQ4;   Vb8
D@D	>B')*V;%b?Gab+3 3 4O;A3 
43  4F 
3 
43   $ V 	 



































Figure 2.1: (a) A sample DFG. (b) A multiplexer for a feed-back loop. (c) An output
register for firing control.
Y P  	3
VR'89*  %	 6
	)4@?




	P G6b5>?PO;;5B3 ; 6@	 dD?	> 




	   9D@B











>D>4   	U 6)D@	>    %-Y=>D>9bd










































DP $2Db 3 
43 DO5;




























4   _S4	"[ b	 	












 d a$GP	>>D;3	V	UD@	>S3 
>D>=+4	3&b
;; a$G 	  b	 7X	  ;5O>D?3	R3 5
6
53W














	-?5 	8'	 @&	CAV_S46	B3  
	P3 
35D=3	   3&b	&	3 

 
4   >	43  ; 
&	CXO 
D; O C$G23 65VO
















3 OB)A43&b Q5 265B3&b 	GOO >D@ ;O	&	>>D4>D
 
@     ;5;>?;@        





3.1 Transformations for Feed-forward DFGs
_S4	"[&6d

	P	 @7 ')* Q 	 bP@45d@&	Ob $2b4R4	3&b
O Z	P;; 3 65V %O;;534%>   
	PO>+OB4D&	C







 DQ 	 D@>D!T&	C

"64R[PO	












 ! &	C+;; 
Y=P$G>>2	dO	&	>>D4>D
 
@  3 5
4Q>  _S 	Q[ 65OOB6&d	5;P	?3	>GODO >DD@
 VO>D4V4QP	 OP	>D4
	 OB4;   XVO>D4V4Qd5B3WB	>2OOB4>D;!
@($2Db 5;O58




>>D44 `b4 _S4	Q[ O 









65P (	%3  		>B>@3 W 	; 
D	>

@?6&A	 ;4 B 3	56 ;?
D@)5O5;S













3.2 Transformation for Recursive DFGs
L -	 '89* b		  44"! 	3
 >D"O $2Db`;4>	    G$G 3	>>2D 	  3 5&67R')*V
Y>Db5@b(O	&	>>
 














Db   . 
3.2.1 Unfolding Transformation
L 	V'89* b	G	 d44"! 	3




































b	 ; 5 S&	 
PC74 	>>>D"OZPb')* "	     b=5 Ib
3 PO5;
	dP4Z 	>>;;; SDP>QO 	B  9
b2"5  ZI;4>	 -4>D4PQ

X>"O    T   34	>> 	'89*,
3&b ;5>
rate optimal





	  5  %b-')*]R@; D  	 ab	G	UD4
	UO 


















')*   2 L G$d5;>? 	 g'89*  
optimum unfolding factor
       1
$2b?3&b(?a
b=>4	%3 VP( 5>DO>+ b8"5 B4A>D"OU;>?	 ; (
b8@	>





















                
    


                            
























                
    B1 C1 A2 B3 C3 A4
A1 B2 C2 A3 B4 C4
P1
P2
0 20 70 140
( a )
0 20 60 0 30 40
( c )
( b )
Figure 3.1: (a) Node execution time of A, B, and C are 10, 20, and 40 respectively.
The Iteration bound is 35 time unit and the critical path time (or the iteration period)
is 60 time unit. (b) A retimed DFG of (a). The critical path time is reduced to 40 time




an overlapped rate optimal schedule.
 	3 
)6	>D>)bB	         D K      dL -?8POB6&	Q+Xb	 7P	X6	>>+5;!
>?;@V 	3   434	5)	V>?	
@)5;>;@V 	3 G@D74%	V>	@="5 B42"4%
b)5;>?;4X')* 	X	%	d3 Q543 )D%3 	4b-3PO> ;  9
3&b45> !
@OB> "P	>D>  	8 9
b 	>>D 5>?;D@ 	3 &2$%	2OOBQ6      9	
   2 	 D2@D7  
     	   
    
$2b4  ?S
b ; 3 5;
Dd
DP ;;	     S
b
 Q5  D4
	PO 
D; 








2gprg means generalized perfect rate graph. A DFG is gprg iff its iteration period is greater than or equal




5;4V	>    d5> 






)9@;  #3 W `b>)	
C74>?	OOB 3&b ;5>)34	  +4	6> PPO>D4PQ ( d5> 
DO;3  G ;64P "b
?%2
b 3	6+ ^"L%;4@ 
 )5)	
5P bB	C+$d$G	=R;4D@ 	 ^"L2$2b3&b  ;435; 
bd
3&b45>D
 9D@B2   3 W   	5P b	R	>D>+;; X	 3  	C
D	>)>@?3 	`
b	C
>  ;4>	  >PQ
3
 6O B  




) ;435; 	C)Pd5     B 3	56dbd7C	>5VF)V6b5>?
  	Q
	  5Q>=
b " 3 5;
D`V[%. 3PO> 
4 #	C V 5D   W %b







	6a	aP=5    -	C 
ab8 ;435;a F)=	B
Y8.V	-3 PO>D  
Remark 1 In VLSI design, all nodes in a delay-free path are combinational logic and
they must not change their output values until the last node finishes its operat ion.
C$h$ 







43 >  	OO>D?3	 >-D ^"L2; 6@ %(6>D78
b?O
 > $










































output at time offset of      each iteration interval, a buffering register in 
may have a periodical activation schedule,
   
	      
.  without corrupting
the original functionality.
9D@BG  . O?3W&(
b P;;4 '89*f 9D@BG D  3 (	C 
 D@ 	 5;HI

@?6-	 
b	3 7 	 3&b4;5>PGb 5H 4 4@?









&	&3 #Y8.  F) 9	VP	&3  [%. Y  	23>D;3
 













                        
B1 A2 C3 A4

















Figure 3.2: The DFG has a buffer register on arc (B2,C2). Because the buffer register
preserves the computation result of ancestor nodes, node B1 can start the next itera-
tion before node C2 finishes. The control signal diagram below the schedule shows
activation timing of buffer register B.




















, schedule : output)
Step 1: Let schedule be an empty set.
Let
  
= iteration bound of
	 
.
Step 2: If there is any path of execution time    




          .
Step 4: While traversing        from start to end,                 "! 
(
 "!  is the execution time of the current node)  # "! 
If
     then
place a buffer register before the current node.
put
   	         %$  "!  to schedule.  
.
end if
Step 5: goto Step 2.
%b	 
"Z+bY>@









		U$2>> B 3>	   U	 ;	PO>D-R3&b	O;
=K
  &	6
	C  	 










DO>DR3&b?3  d	 > 
b 3&b" 	 	OO> R	; Q5	 &	6
	CR
43&b Q5 )	33&;D@Ub4D





hb8b4O	6&  b 
44> -
b	C2	 V$2> (;7>O 4V
43&b;!







	3&bP6aA3&b ;5>D@  3&bQ543	 B8DPO>PQ


















	   9D@B    %bV"5  +$2Db 	UO	4Qb 6?=?8bP" 3 5;!
 PXb ;;   ;;RY 	 [h
	 
R. 3 >;3
 3 ;3 >4 P ; 3 5;
Da	B
;; F &	 
    3>D;3
 3 ;3 >44gY $GR  	3
 ;4>	  >PQ&  'd  '+.  	

@74  P5 $2bD>=OO >D@d4@?








Figure 4.1: A sample DFG with pipelining registers
%b2	 ; 5,b
5@bO5;ZB























 )b O &	C 2	 	 >DU@&	C



























>43WGb-DD?	> 7C	>D5 GD6 	UA
b+>	
3&b U7C	>D5  
 G








C7";8b-DD?	>A7 	>D54%5Q>  bg	 C
bg3 ;3 >+ 6O 43 7> 


















b 3"U	 	 >(@4&	C

 9@    . 
Hold loop



























port( CLK : IN boolean, -- system clock
RST : IN boolean, -- system reset
P1,P2,P3 : OUT boolean,
D1,D2 : OUT boolean );
end EnableGenerator;
Architecture Bev of EnableGenerator is
begin
Process begin
-- For synchronization with system reset
Reset_loop : loop
P1 <= FALSE; P2 <= FALSE; P3 <= FALSE;
D1 <= FALSE; D2 <= FALSE;
-- Main loop of the iteration period 3
Main_loop : loop
--State 1
wait until CLK’event and CLK=TRUE;
if RST=TRUE then exit Reset_loop; end if;
P1 <= FALSE; P2 <= FALSE; P3 <= FALSE;
D1 <= FALSE; D2 <= FALSE;
--State 2
wait until CLK’event and CLK=TRUE;
if RST=TRUE then exit Reset_loop; end if;
--State 3
wait until CLK’event and CLK=TRUE;
if RST=TRUE then exit Reset_loop; end if;
P1 <= TRUE; P2 <= TRUE; P3 <= TRUE;
D1 <= TRUE; D2 <= TRUE;
end loop; -- Main_loop
end loop; -- Reset_loop
end Process;
end Bev; -- Architecture
Figure 4.2: The VHDL code for an enable generator
13
Entity ResetGenerator is
port( CLK : IN boolean, -- system clock
RST : IN boolean, -- system reset
D1_InitVal, D2_InitVal : OUT boolean );
end ResetGenerator;




D1_InitVal <= TRUE; D2_InitVal <= TRUE;
Main_loop : loop
--State 1
wait until CLK’event and CLK=TRUE;
if RST=TRUE then exit Reset_loop; end if;
...
--State 6
wait until CLK’event and CLK=TRUE;




wait until CLK’event and CLK=TRUE;
if RST=TRUE then exit Reset_loop; end if;
D2_InitVal <= FALSE;
-- To hold D1, D2 in FALSE state.
Hold_loop : loop
wait until CLK’event and CLK=TRUE;
if RST=TRUE then exit Reset_loop; end if;
end loop; -- Hold_loop
end loop; -- Main_loop
end loop; -- Reset_loop
end Process;
end Bev; -- Architecture










3&bP(	B bX "Qb 6?-
45>D
 	O 64
4  b(>>DC$2@R$G  	PO>4

4OB 3W74> 
5.1 Implementation of a Feed-forward DFG
Y=V$GRb	 7 V4Q  	>D




P	3 %bX "Qb 6
 










-4>	bO   $G4R;; F
	BR[8
%b DPO>46($%	  - "Qb4?P
 @&	Cg	 8'	] >?  9@KD   
P$2b?3&b 3 Q
	P




P  b	 7"&	>9 "Qb 6?=">?)56@UbP@4&	C
4 8'	 	)	 DO5;4 LN b
	OOQ	3&bIR
D5+3 B6?;&	C






3&b ;5>D@  DB;D@  	B 6 
 
)3 3 4Q&	C 
 b  ! @&	 U')*,@&	C (
Eb 8'	a
FG5; Dd?  B6474 b	 $UP	  b	 7   4dQQ
b4  65>D
 $2b $GP 
gPO
C7Vb(;
 D b@b 	B

	3W >74>#X%b"5-D  VPO
 4
	 >
 3?; ')* 

		 	 






	@)^"P bD@bg>D47>S; 6@ ;
>
 D 9  
 B
	 C7>)3&b ;5>D@ -
b @	>8')* 	B`DB3 
O &	C Pb 
45>DU$2b>D  
@4&	C
4G	 ='  








    





    

















Figure 5.1: Diverse VHDL generation schemes for an original DFG.
=' $2b?3&b 3Q
	DB6Vg3Q




    );"4UX65OOB6 ')* 

	6





















b@b4 	6&	3 V>D474> 7  POB6&	Q B 3	562Da>4	S 	  4 =' 3 ;;
	B 	@Q	gb =' 3 ;;V 65>D











5.2 Buffer Registering and Reduction of Area Cost
  5 	
65V $ $G	U PO>PQX	 V 3 
D?3	>)	>@Db ;4O?3W `
9D@B"K .  	     "U	 ^"L  
b	CG Gb	ab8 ;435; 6O     +P85 &aO 
D&	C
D Z%b?% ;435;X6O  X34	 B- ;
	D   (Pb4GP 	%;3O;2&	C

OP	>PO>PQ
	  #_ >4	(?3 Ub	d
bX')* D @ZK;/. #	  b	 b
D&	C
D  5P    










3&b4;5>DV$G	)+DPO>P4&	>PD 	9^"L8	 $GPb	   	 C7
>	OO 4
16




@7 2	RC74>?	OO 4 3&b4;5>aL S$G-
5Xb 	>D@
 
bE      $ 34	g64- 
?        %b  !T5;>?;4X')* %>>D56&	C
4 Dg9@BK .    
Y>Db5@b $) 
	 U	d
	=O;	> 	9^"L    C! 5;>D@ " 2
 Q5   5B3&b
	
4	U3 64-L $VDB646-	 5H 4)
@64)	33&;D@RY>@Db   bV ;?
D@
	>@Db UC74>?	OOB  d5> 
DO;3  (
3&b ;5>4(34	   	OO>D4 
 	9^"L
; 6@  `b4 $GP5 Pa




	   
>    .-5;>D@BSL P4	 
b	Cb+	
4	-36?a






D d9@ K;/. #3 86bC$=
b. ! 5;>?; 
')*,$2b3&bX?2 53&bRDPO>2
b	 bB	CZ9D@BK;/.   W
Figure 5.2: (a) Original DFG. Node A, B, C, D, E require 20, 5, 10, 10, 2 time unit re
spectively. The iteration bound is 16, the maximum node execution time is 20 and the
critical path time is 20 time units. (b) A 6-unfolded DFG which allows nonoverlapped
rate optimal schedule. A c ritical path is (D1,E1,A2,D3,E3,A4,D5,E5,A6) and requires
96 time units, which corresponds to
  	   
. (c) A 2-unfolded DFG which has an
overlapped rate optimal schedule. It is much simpler than the DFG in (b).
17
Silicon area (# of Xilinx CLB)
Total system 131
Control logic 39
Overhead of control logic 29.8%




Table 5.1: Characteristics of the synthesized hardware





 	 O	dbgc)Y=e  ;6 	d	
 	VO>DX	OO>?3	C
D      %bX9@/K;   #	  bC$ bR')* b B	>2&	@U
c)Y=e  %b-5 ;!N"64\3 6
)2	 






4 	 	=' 3 ;;P	 "Qb 6
 
 g +5D@X^ ";!
OB ;   '84@ [PO>D44 %b+b	























>9b( ;6R `b4 	>D> 	
	X>D  	
VDPO> !






























Figure 5.3: (a) A DFG representation of a mudule of QAM (b) A block diagram








	   C$ @&	Ob 
3 ;; 6@V&	P$G 
d_S 	Q[  _Z
> - "46(	G[;; 6@  "7"PQ 9%b
	8'	 3"d?+ "Qb 6
 









43  8GO &	C +@5	&	Q
4 
YBV_S 	Q[ OB46SOO >D@-

	6















)=5OO        >D47+
b	C2






Q 'Y ">? 	6 
 =' ; 3O;A
Y=- ;6@P 	3D>D4 b
 	
 6P+O342$2b?3&b 	
+   3 "!

4 	 6>D74 a[5

Q>  ;_S4	"[ 34	G@4







  	 3&b	@ a" 3 5;
D 





























D  ^ "O ;-LNB3      Aeg?;> B>? 1  9eg5Q
	D =$  [GY          <8^Y 
COSSAP User’s Manual : VHDL Code Generation

 .     FG53











Simulation, special issue on Simulation Software Development




     
   ] ^"5@  e 	8b [8ILN  	 ^I 8	+'=4P6&	C
Dg3"4D@g$
   C$
 O 4	3  LN
Proc. of International Conference of VLSI Circuit
^;5> + 	 
     






	   C$ @&	ObgOB 3 B3	4LN
30th Asilo-
mar Conference on Signals, Systems, and Computers
_Z	3 B3%*)C7 [G	>D
	 
<8^Y  =C74  +    
 K  + *) 





 	C&	   C$`@
	Ob4 LN
Proc. of the Design Automation Conference

  K
  d[8d  	@ 		PP_Z	
bTB%b8	&Zb@b;! >D474>O( "Qb 6? ;
RILN
VLSI Design Methodologies for Digital Signal Processing Architectures

+>D5"$G
Y=34	;4P3-_a  I     

















	!   C$ O
@&	
7"	VO; 5 5;>D?;@
IEEE Trans. on Computers
    #.   5B	    
21
  'd     	@ 	 d   =5  5>>  6
	C
3Xd5> 
DO;3  	&	   	>
 
	>D!
D  3 
D
?	()
4	> !TPP 3 5
4Q)O 	OO>?3	C
D4
IEEE Trans. on Signal
Processing
   . TK  e 	       
D  [8=  4&6AGA12Q6 	   ^	 ; 	8O;
DP
 
@ QB3&b5B)3D&3 5 

 PD@B LN
Proc. Third Caltech Conf. VLSI




[GY  e 	









Journal of VLSI Signal Processing
  OO  .
 
       K
D . -eg""$G" 





Proc. 35th Design Automation Conf.
^;	 &	3
3  =[GY     5




= K      OO 
K  
 
K    	)3  B4)     
D    S*V   @g	  *VZ[bAX1	!TO;	> O  "Qb4  ODO >U	B
PDDd5 5;>?;@
IEEE Trans. on VLSI Systems
B. 6  e 	&3&b      










O	@4SOO   K
 
      e 	 U  








OO  K  K
 
K        .
D    =  53
d	B P P _Z	






D D bD@b;!T>7>a6O "Qb 6?
IEEE Trans. on CAD of IC and
Systems
  .        5>     
22
