Google
 

Trailing-Edge - PDP-10 Archives - SRI_NIC_PERM_SRC_3_19910112 - mm-new/fscopy.mac
There are 3 other files named fscopy.mac in the archive. Click here to see a list.
	TITLE FSCOPY - Fast String Copy
	SUBTTL Courtesy of KLH
	SEARCH MACSYM

;;;AC's

A=:1				;Source BP
B=:2				;Temp
C=:3				;Byte count
D=:4				;Local AC's follow
E=:5
T=:6
U=:7
V=:10
W=:11
L=:12
M=:13
N=:14
O=:15				;Destination BP

KLWINC==^D18			;# chars at which hairy word move starts
				; winning over BP loop

$STENT==1			;offset from beg of loop for entry to
				; STORE phase
$GENT==4			;offset from beg of loop for entry to GET phase

	.PSECT DATA

FSCPKL:	0			;a LSHC A,<n> for SHIFT-IN goes here
	LSH A,1
	MOVEM A,(C)		;Address of dest stored in RH here
	0			;a LSHC A,<n> for SHIFT-OUT goes here
	MOVE B,(C)		;Address of source stored in RH here
	AOBJN C,FSCPKL
	JRST @FENTRM(D)

	.ENDPS
	.PSECT CODE
FSCOPY::CAIL C,KLWINC		;Less than break-even point?
	IFSKP.
	  DO.			;Yes, can't win
	    ILDB B,A		;Simple byte-by-byte copying.
	    IDPB B,O
	    SOJG C,TOP.
	  ENDDO.
	  RET
	ENDIF.

;;;Wheee, using hairy word copying!

	STKVAR <<TMPACS,20>>
	HRLI B,D		;Save AC's D-O
	HRRI B,D+TMPACS
	BLT B,O+TMPACS

	LDB E,[POINT 3,A,5]	;Get low 3 bits of P field for source
	SKIPGE E,FSCHTB(E)	;Get resulting # chars, skip if addr ok
	 MOVEI A,1(A)		;P= 01, must bump address.
	MOVEI L,1(A)		;Anyway, get addr+1 into 12.
	LDB D,[POINT 3,O,5]	;Repeat procedure for dest
	SKIPGE D,FSCHT2(D)	;Using slightly different table
	 AOSA V,O		;And addr goes into 10
	  MOVEI V,(O)		;And isn't normally bumped.
	MOVEI O,(C)		;Update the destination pointer in o
	ADJBP O,O+TMPACS	;From initial value
;Now get index for shift values, and count for words
	SUBI C,(E)		;Get # chars minus those in 1st src wd.
	ADDI E,-6(D)		;Get E index - D*5+S, zero based.
	IDIVI C,5		;Find # words to loop through, rem in d
	MOVE B,-1(L)		;And get 1st word of source.
	JRST @FPATH(E)		;Must now pick a path...

;Blt possible!  Jump to FSBLT0 if no shifting needed for setup.

FSBLT0:	MOVEM B,(V)		;Store source word directly
	JRST FSBLT4
FSBLT:	LSH B,@SHASL(E)		;Shift source up against left
	MOVE A,(V)		;Get 1st wd of dest.
	LSH A,@SHADR(E)		;Right-adjust it
	LSHC A,@SHFIX(E)	;And get everything into A.
	LSH A,1			;Need one more bit's worth.
	MOVEM A,(V)		;Store 1st wd of dest...

;Now settle down to serious BLT'ing.

FSBLT4:	MOVEI T,(C)		;Transfer word count
	ADDI T,(V)		;Find addr of last dest word
	MOVEI V,1(V)		;Now get 1st dest addr,
	HRLI V,(L)		;And put 1st source addr in LH.
	BLT V,(T)		;Zoom!!
	JUMPE D,FSCPY9		;If no remainder, super win - done!
	ADDI L,(C)		;Hmm, must get last source word.
	MOVE B,(L)		;Like so.
	MOVE A,FBMSK(D)		;And a word mask for chars
	AND B,A			;Clear unused bits from source,
	ANDCAM A,1(T)		;And zap target bits in dest.
	IORM B,1(T)		;And stick last chars in.
	JRST FSCPY9		;Ok, all done...

;Can't do BLT.  Well, get A and B set up for magical shift loop.

SHSKP2:	LSH B,@SHASL(E)		;Here, only need to adjust source,
	JRST SHSKP5		;Since dest will be totally clobbered.
FSSHFT:	LSH B,@SHASL(E)		;Both src and dest must be integrated.
SHSKP1:	MOVE A,(V)		;Only need adjust dest; src wd is full.
	LSH A,@SHADR(E)
SHSKP5:	LSHC A,@SHFIX(E)	;Stuff as many chars as possible into A
	CAIE D,0		;If any remainder,
	 MOVEI C,1(C)		;Add 1 more word.
	MOVNI C,(C)		;Make AOBJN pointer.
	MOVSI C,(C)

;Now make another index for termination wrapup purposes.

	ADD D,FFINDX(E)		;New idx using # chs left in last wd.

;Now set things up for loop, and enter it.

FSCPY3:	HRLI V,(<MOVEM A,(C)>)
	MOVEM V,FSCPKL+2	;Address for MOVEM
	HRRM L,FSCPKL+4		;Address for MOVE
	MOVE L,FSHINT(E)	;Get LSH for shift-in
	MOVEM L,FSCPKL
	MOVE L,FSHOUT(E)	;And shift-out
	MOVEM L,FSCPKL+3
	JUMPGE D,FSCPKL+$STENT	;Depending on flag, enter loop at store
	SOS V,FSCPKL+2
	JRST FSCPKL+$GENT	;Or at get.

;---------------------------------------------------------------------------
;Come here when loop finished.  The last word of the source string
;will be in B.  It may have 1 to 5 chars left for moving, but will
;never have 0.

;Long wrapup.

FSCPTL:	LSHC A,@FSCPKL		;Perform a shift-in
	LSH A,1
	MOVEM A,@10		;Store full word.
	MOVEI C,1(C)		;Increment address index
;And drop through to Medium wrapup.

;Medium wrapup.

FSCPTM:	LSHC A,@FLOUT(D)	;Shift rest of source word into A
	MOVE B,@10		;Get dest word it will be stored into
	LSH B,@FLADJ(D)		;Left-adjust chars to preserve.
				;And drop thru to Short wrapup.

;Short wrapup.

FSCPTS:	LSHC A,@FFLOUT(D)	;Do final, last, shift-out.
	ANDCMI A,1
	MOVEM A,@10		;And store last dest word.

;Done!!  Just restore regs and return.

FSCPY9:	HRLI N,D+TMPACS		;Restore regs
	HRRI N,D
	BLT N,N
	RET
;Indexed by low 3 bits of P field, returns # chars
;existing to right of loc BP points to.  Hence value
;ranges from 5 to 1; if P = 01, SETZ indicates that
;bp address needs incrementing.

FSCHTB:	1			;P=10
	SETZ 5			;P=01, increment addr
	0
	0			;Randomness
	5			;P=44, full word
	4			;P=35, 4 chars to go
	3			;P=26
	2			;P=17

;This table is just like FSCHTB except values are pre-multiplied
;by 5 for easy addition into E.

FSCHT2:	1*5			;P=10
	SETZ 5*5		;P=01, increment addr
	0
	0			;Random
	5*5
	4*5
	3*5
	2*5

;This table is indexed by D when it has # chars remaining from
;dividing # chars (in C) by 5.  Provides mask for these chars.

FBMSK:	0			;Nothing here.
	BYTE (7) 177
	BYTE (7) 177, 177
	BYTE (7) 177, 177, 177
	BYTE (7) 177, 177, 177, 177

;FPATH table vectors off to BLT and other minor stuff as
;soon as all the basic computations are made.
;Indexed by E.

FPATH:	FSBLT
	FSSHFT
	FSSHFT
	FSSHFT
	SHSKP1
	FSSHFT
	FSBLT
	FSSHFT
	FSSHFT
	SHSKP1
	FSSHFT
	FSSHFT
	FSBLT
	FSSHFT
	SHSKP1
	FSSHFT
	FSSHFT
	FSSHFT
	FSBLT
	SHSKP1
	SHSKP2
	SHSKP2
	SHSKP2
	SHSKP2
	FSBLT0

DEFINE ENT (A,B,C,D,E) <
	A*7
	B*7
	C*7
	D*7
	E*7
>;DEFINE ENT

;SHASL table, contains # bits to shift first source wd left so
;as to left-adjust it in B.  Indexed by E.

SHASL:	ENT 4,3,2,1,0
	ENT 4,3,2,1,0
	ENT 4,3,2,1,0
	ENT 4,3,2,1,0
	ENT 4,3,2,1,0

;SHADR table, contains # bits to shift first dest wd right so
;as to right-adjust it in A.  Indexed by E.

DEFINE ENT1 (A,B,C,D,E) <
	0,,A*7-1
	0,,B*7-1
	0,,C*7-1
	0,,D*7-1
	0,,E*7-1
>;DEFINE ENT1

SHADR:	ENT1 -1,-1,-1,-1,-1
	ENT1 -2,-2,-2,-2,-2
	ENT1 -3,-3,-3,-3,-3
	ENT1 -4,-4,-4,-4,-4
	ENT1 -5,-5,-5,-5,-5

;SHFIX table, contains # bits to left-shift A and B combined so
;as to move as many characters out of B as possible.  Indexed
;by E.  MIN(d,e) (d and e after fschtb)

SHFIX:	ENT 1,1,1,1,1
	ENT 1,2,2,2,2
	ENT 1,2,3,3,3
	ENT 1,2,3,4,4
	ENT 1,2,3,4,5

;FSHINT table, containing appropriate LSHC instructions for shifting
;in the first chars of a fresh source word.  Indexed by E.

DEFINE ENTL (ARG1,ARG2,ARG3,ARG4,ARG5) <
	LSHC A,ARG1*7
	LSHC A,ARG2*7
	LSHC A,ARG3*7
	LSHC A,ARG4*7
	LSHC A,ARG5*7
>;DEFINE ENTL

FSHINT:	ENTL 5,4,3,2,1
	ENTL 1,5,4,3,2
	ENTL 2,1,5,4,3
	ENTL 3,2,1,5,4
	ENTL 4,3,2,1,5

;FSHOUT table, containing appropriate LSHC instructions for shifting
;out the last chars of an old source word, to make room for a
;new one.  Indexed by E.

FSHOUT:	ENTL 0,1,2,3,4
	ENTL 4,0,1,2,3
	ENTL 3,4,0,1,2
	ENTL 2,3,4,0,1
	ENTL 1,2,3,4,0

;FFINDX table, contains part of D index for fast add-in.
;Indexed by E.  Similar to FSHOUT.  Sign bit also indicates
;whether entry point is $STENT (pos) or $GENT (neg).

DEFINE ENTS (A,B,C,D,E) <
	ENT5 A
	ENT5 B
	ENT5 C
	ENT5 D
	ENT5 E
>;DEFINE ENTS
DEFINE ENT5 (X,Y) <
	X+<Y*5>
>;DEFINE ENT5
S==0B0
G==1B0

FFINDX:	ENTS (<S,0>,<S,1>,<S,2>,<S,3>,<S,4>)
	ENTS (<G,4>,<S,0>,<S,1>,<S,2>,<S,3>)
	ENTS (<G,3>,<G,4>,<S,0>,<S,1>,<S,2>)
	ENTS (<G,2>,<G,3>,<G,4>,<S,0>,<S,1>)
	ENTS (<G,1>,<G,2>,<G,3>,<G,4>,<S,0>)

;Last item (5) is actually first (0)
DEFINE ENTX (A,B,C,D,E) <
	7*E
	7*A
	7*B
	7*C
	7*D
>;DEFINE ENTX

;FENTRM table, dispatching to appropriate wrapup routine when fast AC
;loop is finished.  Indexed by D.

FENTRM:
DEFINE ENTXJ (A,B,C,D,E) <
	FSCPT'E
	FSCPT'A
	FSCPT'B
	FSCPT'C
	FSCPT'D
>;DEFINE ENTXJ
	ENTXJ M,M,M,M,S
	ENTXJ M,M,M,S,L
	ENTXJ M,M,S,L,L
	ENTXJ M,S,L,L,L
	ENTXJ S,L,L,L,L

;FLOUT table, for use by Medium wrapup routine;pushes out remaining
;source chars in B, making room for incoming dest word.
;Indexed by D.

FLOUT:	ENTX 1,2,3,4,0
	ENTX 1,2,3,0,1
	ENTX 1,2,0,1,2
	ENTX 1,0,1,2,3
	ENTX 0,1,2,3,4

;FLADJ table, also for Medium wrapup routine;adjusts dest word in
;B to left-adjust chars to be preserved.

FLADJ:	ENTX 1,2,3,4,5
	ENTX 2,3,4,5,1
	ENTX 3,4,5,1,2
	ENTX 4,5,1,2,3
	ENTX 5,1,2,3,4

;FFLOUT table, for Short wrapup routine.  Final Last shift-out of
;chars in B, so that the last dest word can be stored from A.
;Indexed by D.  Adds 1 extra bit since MOVEM A, done right after it,
;and nothing to preserve in B.

FFLOUT:
DEFINE ENTX1 (A,B,C,D,E) <
	E*7+1
	A*7+1
	B*7+1
	C*7+1
	D*7+1
>;DEFINE ENTX1

	ENTX1 4,3,2,1,5
	ENTX1 3,2,1,4,4
	ENTX1 2,1,3,4,3
	ENTX1 1,2,4,3,2
	ENTX1 1,4,3,2,1

	END