;*********************************************************************** pro xder0 ; ; Periodic x-derivatives ; 1st derivative, 6th order compact (cf. Lele, JCP 103, 16-42) ; truncation error: f' = (f')_exact + 7.9e-4*dx^6*d^7f/dx^7 ; ; alpha*[f'(i+1)+f'(i-1)] + f'(i) = a*[f(i+1)-f(i-1)] + b*[f(i+2)-f(i-2)] ; where alpha=1/3, a=14/(9*2*dx), b=1/(9*4*dx) ; ; rescale whole equation by (9/7)*dx, such that a=1: ; alpha*[f'(i+1)+f'(i-1)] + beta*f'(i) = [f(i+1)-f(i-1)] + b*[f(i+2)-f(i-2)] ; alpha=3*dx/7, beta=9*dx/7, and ; e = (1/28)*[f(i+2)-f(i-2)] + [f(i+1)-f(i-1)] ; ; Calculates the matrix elements and an LU decomposition of the problem ; We have an equation system of the form: ; ; | w2 w3 w1 | | d | | e | ; | w1 w2 w3 | | d | | e | ; | . . . | * | . | = | . | ; | . . . | | . | | . | ; | w3 w1 w2 | | d | | e | ; ; This may be solved by adding a fraction (w4) of each row to the ; following row, and a fraction (w5) to the last row. ; ; This corresponds to an LU decomposition of the problem: ; ; | w2 w3 w1 | | d | | 1 | | e | ; | w2 w3 w1 | | d | | w4 1 | | e | ; | . . . | * | . | = | w4 1 | * | . | ; | . . | | . | | . . | | . | ; | w2 | | d | | w5 w5 . w4 1 | | e | ; ; In this startup routine, we calculate the fractions w4 and w5. ; These are the same for all periodic derivatives with the same period, ; and only have to be calculated once for all ny*nz derivatives. ; ; In principle, this startup routine only has to be called once, ; but to make the xder entry selfcontained, xder0 is called from ; within xder. In practice, the xder0 time is insignificant. ; ; Update history: ; . ; 20-aug-87: coded by Ake Nordlund, Copenhagen University Observatory ; 14-sep-87: performance with alternative y-/z-loop ordering tested ; 04-nov-87: inverted w2(), to turn divides into multiplications ; 23-sep-94/db: changed from spline to 6th-order compact derivative ; ;*********************************************************************** ; common cdat,x,y,z,nx,ny,nz,nw,ntmax,date0,time0 common cxder,w1,w2,w3,w4,w5 ; ; set coefficients on lhs: (1, 3, 1)*3*dx/7 ; dx=x(1)-x(0) cx=dx*3./7. ; w1=fltarr(nx) w2=w1 w3=w1 w4=w1 w5=w1 w1(*)=cx w2(*)=3.*cx w3(*)=cx ; ; Eliminate subdiagonal elements of rows 2 to nx-2 ; for k=1,nx-3 do begin w4(k)=-w1(k)/w2(k-1) w2(k)=w2(k)+w4(k)*w3(k-1) w1(k)=w4(k)*w1(k-1) w5(k)=-w3(nx-1)/w2(k-1) w2(nx-1)=w2(nx-1)+w5(k)*w1(k-1) w3(nx-1)=w5(k)*w3(k-1) end ; ; We now have a 3*3 system on the last three rows: ; ; | w2 w3 w1 | * | . | = | . | ; | w1 w2 w3 | | . | | . | ; | w3 w1 w2 | | d | | e | ; w4(nx-2)=-w1(nx-2)/w2(nx-3) w2(nx-2)=w2(nx-2)+w4(nx-2)*w3(nx-3) w3(nx-2)=w3(nx-2)+w4(nx-2)*w1(nx-3) w5(nx-2)=-w3(nx-1)/w2(nx-3) w1(nx-1)=w1(nx-1)+w5(nx-2)*w3(nx-3) w2(nx-1)=w2(nx-1)+w5(nx-2)*w1(nx-3) ; ; Row nx ; w4(nx-1)=-w1(nx-1)/w2(nx-2) w2(nx-1)=w2(nx-1)+w4(nx-1)*w3(nx-2) ; ; Invert all the w2()'s, turns divides into mults ; w2(*)=1./w2(*) ; end ;*********************************************************************** function xder,f ; ; The following LU decomposition of the problem was calculated by the ; startup routine: ; ; | w2(1) w3(1) w1(1) | |d| | 1 | |e| ; | w2(2) w3(2) w1(2) | |d| | w4(2) 1 | |e| ; | . . . | * |.| = | w4(3) 1 | * |.| ; | . . | |.| | . . | |.| ; | w2(N) | |d| | w5(2) w5(3) . w4 1 | |e| ; ; The solutions of all ny*nz equations are carried out in parallel. ; ; The y-loops are placed inside the z-loops, to obtain a smaller stride ; (less paging). This is important for 3D-cases with large my*mz. ; If the z-loops are placed inside the y-loops, one achieves better ; performance in 2D cases (where ny=1), but this is of less importance. ; ; Update history: ; . ; 20-aug-87: coded by Ake Nordlund, Copenhagen University Observatory ; 14-sep-87: performance with alternative y-/z-loop ordering tested ; 04-nov-87: inverted w2(), to turn divides into multiplications ; 25-feb-88: removed use of swapped ff(), speeded things up 10 % ; 04-mar-88: padded blank common with one chunk, for curl, curl2, nodiv ; 07-mar-88: coalesced m,n loops -> mn loop ; 12-apr-88: removed blank common padding for hd (no curl, curl2) ; 23-sep-94/db: changed spline to 6th-order compact derivative ; ;*********************************************************************** ; common cdat,x,y,z,nx,ny,nz,nw,ntmax,date0,time0 common cxder,w1,w2,w3,w4,w5 ; ; NOTE! This version uses mw words of blank common. Beware of cases ; where the calling routine also uses blank common. ; div() does, but only after xder() has been called, so it is OK. ; curl uses mw words, and curl2 use 2*mw words blank common ; ; Check if we have a degenerate case (no x-extension) ; d=f if nx eq 1 then begin d(*,*,*)=0.0 return,d endif ; ; Do LU decomposition (this is 1-D only) ; ; Right hand sides, e = [f(i+1)-f(i-1)] + (1/28)*[f(i+2)-f(i-2)] ; xder0 c=1./28. nx1=nx-1 & nx2=nx-2 & nx3=nx-3 & nx4=nx-4 & nx5=nx-5 d(0,*,*)=(f(1,*,*)-f(nx1,*,*))+c*(f(2,*,*)-f(nx2,*,*)) d(1,*,*)=(f(2,*,*)-f(0,*,*))+c*(f(3,*,*)-f(nx1,*,*)) d(2:nx3,*,*)=(f(3:nx2,*,*)-f(1:nx4,*,*)) $ +c*(f(4:nx1,*,*)-f(0:nx5,*,*)) d(nx2,*,*)=(f(nx1,*,*)-f(nx3,*,*))+c*(f(0,*,*)-f(nx4,*,*)) d(nx1,*,*)=(f(0,*,*)-f(nx2,*,*))+c*(f(1,*,*)-f(nx3,*,*)) ; ; Do the forward substitution [2m+2a = 4 flop] ; for k=1,nx-2 do begin d(k,*,*)=d(k,*,*)+w4(k)*d(k-1,*,*) d(nx-1,*,*)=d(nx-1,*,*)+w5(k)*d(k-1,*,*) end d(nx-1,*,*)=d(nx-1,*,*)+w4(nx-1)*d(nx-2,*,*) ; ; Do the backward substitution [3m+2s = 5 flop] ; d(nx-1,*,*)=d(nx-1,*,*)*w2(nx-1) d(nx-2,*,*)=(d(nx-2,*,*)-w3(nx-2)*d(nx-1,*,*))*w2(nx-2) for k=nx-3,0,-1 do begin d(k,*,*)=(d(k,*,*)-w3(k)*d(k+1,*,*)-w1(k)*d(nx-1,*,*))*w2(k) end ; return,d end