/*  26 April 1999. Timur Kazimrov (timur@r1.sax.inkom.ru) 
    28 Dec 2002 modified for SREhttp/2 by Daniel Hellerstein (danielh@crosslink.net)

    Pre-reply procedure that strips CRLFs, unused spaces, and comments
    from outgoing HTML documents, for selected clients. 
    This can be useful for large HTML documents in low bandwidth situations.

    In addition, you can GZIP the contents after stripping out unneeded
    stuff! And, GZIP even if it's not HTML!

    You can also run it in stand-alone mode (possibly even under non-os/2 rexx)

Setup:
 1) Edit the ClientsToPack. parameters 
    a) set ClientsToPack.0= # of entries
    b) set ClientsToPack.n= a numeric IP address (n=1,..,ClientsToPack.0).
       You can use * as wildcards.
       For example:   if the client's IP address is 199.122.33.1, then
       the following will match:
                ClientsToPack.3='199.122.33.1'
                ClientsToPack.3='199.122.*'
                ClientsToPack.3='199.12*'
 3) Modify the other user-changeable parameters -- pay special attention to  
    bytes_second and maxdocsize

 4) Assign PACKBACK as the default, or selector specific, pre-response
    procedure  (see CVT_PAGE.TXT for the details)


Notes:
  *  If you are using more then one pre-response procedure, you'll have to create
     your own "pre-response" manager that calls each pre-response procedure in the
     proper sequence.

  * If a GZIP argument is sent (when the pre-response procedure is called),
    then a content-encoding: GZIP request header is looked for. If found,
    a GZIP content-encoding is applied to the output AFTER stuff is stripped out,
    or to the content as-is if it's NOT html.
    
*/



/*   ---------- Begin   USER CHANGEABLE PARAMETERS   ------------------ */

/* If client's address is presented here the HTML compressing
   will be performed -- note that * wildcards are allowed
   If ClientsToPack.0=0, then this procedure does nothing. */

CLIENTSTOPACK.0=1

CLIENTSTOPACK.1='*' 

/* If the source document is larger than MAXDOCSIZE the compression
   won't be performed. This parameter should be tuned. Size is
   in bytes.
   Note: large files can take a minute or longer to process, which can cause
         an "inactive-timeout". To avoid this,  increase the options-limit-
         end_client_after_inactive SRE2003 parameter
*/


MAXDOCSIZE=1500000  /* All documents larger that 1,500,000 bytes won't be compressed */

/* If the source document is smaller than MINDOCSIZE the compressing
   won't be performed (though they still may be gzipped).
   This parameter should be tuned. Size is  in bytes. */

MINDOCSIZE=2000  /* All documents smaller that 2000 bytes won't be compressed
                    (though they still may be GZIPPED) */


/* This is used to set the LimitTimeInactive SRE2003 setting, so as
   to avoid timeout problems due to slow processing times.

   The value should be the expected processing power of this 
   procedure on your server's hardware, in  bytes per second.  

   Hence, if bytes_second=5000 and the size of a file is 150,000,
   then LimitTimemInactive will be set to 30 seconds (for this
   request).

   To disable this check, set bytes_second=0
*/
bytes_second=10000

/*   ----------  End  USER CHANGEABLE PARAMETERS   ------------------ */

sreh2_packback:

parse arg contents,fileflag,mimetype,astring,req_string,seluse2,privset,hname,idinfo

isalone=0
if fileflag='' then do          /* stand alone mode */
  call init_standalone
  isalone=1
  aa=stuff ; drop stuff
  signal doalone
end 


if pos('TEXT/HTML', translate(mimetype))=0 then do  /* no clean out, buy maybe gzip */

   if pos('GZIP',translate(astring))=0 then return ''

/* check client's capabilities */

   oo=sre_reqfield('Accept-Encoding',,idinfo)
   oo=translate(translate(oo,',',' '))
   if wordpos('GZIP',oo)=0 then return ''

/* else, gzip it */
   if fileflag=1 then do
     jlen=stream(contents,'c','query size')
     foo=stream(contents,'c','open read')
     aa=charin(contents,1,jlen)
     foo=stream(contents,'c','close')
   end
   else do
     aa=contents
   end

   gzip_string=rxgzDeflateString(aa)
   if gzip_string='' then return ' '  /* give up, some kind of error */
   foo=sre_command('Header add Content-Encoding: gzip ',,idinfo)
   return '1 '||'0d0a'x||gzip_string
end


/* if here, is html */

if CLIENTSTOPACK.0=0 then
  return 0

clientfound=0
caddr=sre_extract('CLIENTADDR',idinfo)

do mm=1 to clientstopack.0
  oo=sre_wild_match(caddr,clientstopack.mm)
  if oo=0 then iterate          /* not a match */
  clientfound=1
  leave
end

if clientfound=0 then return '' /* don't do this client */

if fileflag=1 then do
   jlen=stream(contents,'c','query size')
   foo=stream(contents,'c','open read')
   aa=charin(contents,1,jlen)
   foo=stream(contents,'c','close')
end
else do
   jlen=length(contents)
   aa=contents
end

if (jlen > MAXDOCSIZE) then return ''
if (jlen < MINDOCSIZE)  then return ''

if bytes_second>0 & jlen>(bytes_second*3) then do  /* don't bother checking if relatively small document */
   needed=trunc(jlen/bytes_second)
   goo=sre_extract('LimitTimeInactive',idinfo)
   if goo<needed then hoo=sre_command('SET LimitTimeInactive ',needed,idinfo)
end


doalone: ;

aa=htmlpack(aa,isalone)

if isalone=1 then do    /* write file and exit */
   foo=charout(outfile,aa,1)
   if foo>0 then
        say "Problem writing: "outfile
   else
        say outfile "written succesfully"
  exit
end
else do

   if aa='' then return ''

/* check for GZIP encoding? */
   if pos('GZIP',translate(astring))=0 then 
      return '1 '||'0d0a'x||aa

/* check client's capabilities */

   oo=sre_reqfield('Accept-Encoding',,idinfo)
   oo=translate(translate(oo,',',' '))
   if wordpos('GZIP',oo)=0 then                 /* send as is */
      return '1 '||'0d0a'x||aa
   gzip_string=rxgzDeflateString(aa)
   if gzip_string='' then return ' '  /* give up, some kind of error */
  
   foo=sre_command('Header add Content-Encoding: gzip ',,idinfo)

   return '1 '||'0d0a'x||gzip_string
end

/*************************/
htmlpack:procedure expose broken ver
parse arg stuff,isalone

ssi_list='REPLACE # % INCLUDE $ SELECT INTERPRET OPTIONS CACHE'

broken=0

ssi_list.0=0
if ssi_list<>'' then do
  do ioo=1 to words(ssi_list)
    ssi_list.ioo=translate(strip(word(ssi_list,ioo)))
  end
  ssi_list.0=words(ssi_list)
end


/* parse the file, removing crls and spaces where possible */
/* but -- pull out PRE SCRIPT and APPLET chunks */

/* a no longer used shortcut
tstuff=translate(stuff)
i1=pos('PRE',tstuff); i2=pos('SCRIPT',tstuff) ; i3=pos('APPLET',tstuff)
if i1+i2+i3=0 then do
   newstuff=space(translate(stuff,' ','00090d0a'x),1)
    signal alldone    
end */

newstuff=''
iwas=0
do forever
    if isalone=1 then do
        if length(newstuff)-iwas > 10000 then do
                say " Processed "length(newstuff)
                iwas=length(newstuff)
        end
    end 
    if stuff='' | stuff=' ' then leave
    parse var stuff a1z '<' a2 '>' stuff
    a1z=translate(a1z,' ','0d0a0009'x)
    a1=strip(space(a1z,1))
    if a1<>'' then do
      if left(a1z,1)=' ' then a1=' '||a1  /* space inappropriately strips! */
      if right(a1z,1)=' ' then a1=a1||' '
    end
    newstuff=newstuff||a1
    if a2="" & stuff="" then  leave

/* what kind of element? */
   parse var a2 a2a a2b a2bb ; a2a=strip(translate(a2a))

/* get rid of comments? */
    a2b=translate(strip(a2b))
    if a2a='!--' then do                /* a comment -- remove it (if not an ssi)? */
        iscomment=1
        parse var a2b a2c . 
        do mm=1 to ssi_list.0
           if abbrev(ssi_list.mm,a2b)=1 then do
               iscomment=0 ; leave
           end /* do */
        end /* do */
       if iscomment=1 then iterate
    end /* do */

/* is a2a=PRE, SCRIPT or APPLET? If so, read as is (until /a2a) */
    if wordpos(a2a,'PRE SCRIPT APPLET')=0 then do
         a2=space(translate(a2,' ','0d0a0009'x),1)
         newstuff=newstuff||'<'||a2||'>'
         iterate
    end /* do */
/* add to newstuff until /a2a */
    newstuff=newstuff||'<'||a2||'>'
    endat='/'||a2a
    do forever                  /* use as is */
       if stuff='' | stuff=' ' then leave
       parse var stuff b1 '<' b2 '>' stuff
       newstuff=newstuff||b1||'<'||b2||'>'
       parse var b2 b2a . ; b2a=strip(translate(b2a))
       if b2a=endat then leave
    end /* do */
end                     /* stuff parsing */   


alldone:

return newstuff         /* might be full doc */



/****************************************/
/* initializations; when run as a standalone program */
init_standalone:

opsys=''

do forever
 if queued()=0 then leave
 pull foo
end
parse upper version oof

if opsys=''  then do
  if pos('REXXSAA',oof)>0 then
    '@VER | rxqueue'
  else
    'VER | rxqueue'

  do forever
    if queued()=0 then leave
    pull oot
    if oot<>'' then otype=oot
  end
  select
    when pos('95',otype)>0 then opsys='WIN95'
    when pos('OPERATING SYSTEM/2',otype)>0 then opsys='OS/2'
    when pos('DOS',otype>0) then opsys='DOS'
    when pos('NT',otype)>0 then opsys='NT'
    otherwise opsys=''
  end
end

parse version fooa aa
say "You are using REXX version: "fooa" (under "opsys ')'


foo2=1
foo=rxfuncquery('sysloadfuncs')
if foo=1 then do
  foo2=RxFuncAdd('SysLoadFuncs', 'RexxUtil', 'SysLoadFuncs')
  if foo2=0 then call SysLoadFuncs
  foo=rxfuncquery('sysloadfuncs')
end
if foo=0 then  do
   got_rexxutil=1
end
else do
  say "Note: RexxUtil is NOT installed, so some options will not be available."
  got_rexxutil=0
end

if opsys='OS/2' then  cansi=checkansi()

if  cansi=1 then do
  aesc='1B'x
  cy_ye=aesc||'[37;46;m'
  normal=aesc||'[0;m'
  bold=aesc||'[1;m'
  re_wh=aesc||'[31;47;m'
  reverse=aesc||'[7;m'
end
else do
  say "Warning: ANSI not available, output will be simpler."
end


infile='';outfile=''

getin:
if infile="" then do
    call lineout,bold " Enter name of HTML file (?DIR for a directory, EXIT to quit) "normal
    call charout,"  "reverse " :" normal
    pull infile ; infile=strip(infile)
end

if strip(translate(infile))='EXIT' then do
   if addonmode<>1 then say "bye "
   exit
end /* do */


if abbrev(translate(infile),'?DIR')=1 then do
    parse var infile . thisdir

    if thisdir="" then do
      if opsys='OS/2'then
        thisdir=strip(directory(),'t','\')||'\*.*'
     else
       thisdir='.\*.*'
    end
    say
    say reverse ' List of files in: ' normal bold thisdir normal
    do while queued()>0
            pull .
    end /* do */
    toget=thisdir
    select
       when  opsys='OS/2' then '@DIR /b  '||toget||' | rxqueue'
       when  opsys='LINUX' then 'ls '||toget||' | rxqueue'
       otherwise 'DIR /b  '||toget||' | rxqueue'
    end
    foo=show_dir_queue('*')
    say
    infile=''
    signal getin
end

if abbrev(translate(strip(infile)),'/DIR')=1 then do
    infile=substr(strip(infile),2)
    address cmd infile
    infile=''
    signal getin
end /* do */


/* maybe it's actually a file name */

infile=strip(infile)
infile0=infile
if pos('.',infile)=0 then infile=infile||'.HTM'
htmlfile=stream(infile,'c','query exists')              /* does it, or .html or .htm version of it, exist*/
if htmlfile='' & pos('.',infile0)=0 then htmlfile=stream(infile0,'c','query exists')
if htmlfile='' then htmlfile=stream(infile0||'.HTML','c','query exists')


if htmlfile='' then do
    Say "Sorry. could not find: " infile
    exit
end /* do */

htmllen=stream(htmlfile,'c','query size')
if htmllen=0 then do
   say " Sorry -- " htmlfile " is empty "
   infile=''
   signal getin
end /* do */
stuff=charin(htmlfile,1,htmllen)
Say "Reading " HTMLlen " characters from " htmlfile

outget: nop
if outfile='' then do
   parse var htmlfile tout '.' .
   tout=tout||'.TXT'
   say " "
   say bold " Enter name of output file (ENTER="tout")"normal
   call charout,"  "reverse " :" normal
   parse pull outfile
   if outfile='' then outfile=tout
end /* do */

foo=stream(outfile,'c','query size')
if foo='' then foo=0

signal off syntax ; signal off error
signal on syntax name hoy1 ; signal on error name hoy1
if foo<>0 then do
     if forceout=0 then do
        arf=yesno(normal"     "bold"Overwrite")
        if arf=0 then do
            outfile='' ; signal outget
        end /* do */
     end                /* else, command line mode implies overrwrite */
     else do
          say "     Overwriting "foo
     end /* do */
     if got_rexxutil=1 then do
         foo=sysfiledelete(outfile)
     end
     else do
        say "     .... trying to delete "outfile
        if opsys="LINUX" then
           'rm 'outfile
        else
           'DEL 'outfile
        foo=0
     end
     foo=stream(outfile,'c','query exists')
     if foo<>'' then do
            say "      Could not delete "outfile
            outfile=''
            signal outget
     end /* do */
end /* do */

return 0                /* outfile and stuff are set */

hoy1:
outfile=' '
say " % " sigl " : " rc
say "File exists. Try another name"
signal off syntax ; signal off error
signal outget



 /* ------------------------------------------------------------------ */
 /* function: Check if ANSI is activated                               */
 /*                                                                    */
 /* call:     CheckAnsi                                                */
 /*                                                                    */
 /* where:    -                                                        */
 /*                                                                    */
 /* returns:  1 - ANSI support detected                                */
 /*           0 - no ANSI support available                            */
 /*          -1 - error detecting ansi                                 */
 /*                                                                    */
 /* note:     Tested with the German and the US version of OS/2 3.0    */
 /*                                                                    */
 /*                                                                    */
 CheckAnsi: PROCEDURE
   thisRC = -1

   trace off
                         /* install a local error handler              */
   SIGNAL ON ERROR Name InitAnsiEnd

   "@ANSI 2>NUL | rxqueue 2>NUL"

   thisRC = 0

   do while queued() <> 0
     queueLine = lineIN( "QUEUE:" )
     if pos( " on.", queueLine ) <> 0 | ,                       /* USA */
        pos( " (ON).", queueLine ) <> 0 then                    /* GER */
       thisRC = 1
   end /* do while queued() <> 0 */

 InitAnsiEnd:
 signal off error
 RETURN thisRC



/*********/
/* show stuff in queue as a list */
show_dir_queue:procedure expose qlist.
parse arg lookfor
    ibs=0 ;mxlen=0
    if lookfor<>1 then
       nq=queued()
     else
        nq=qlist.0
    do ii=1 to nq
       if lookfor=1 then do
          aa=qlist.ii
          ii2=lastpos('\',aa) ; anam=substr(aa,ii2+1)
       end /* do */
       else do
          pull aa
          if pos(lookfor,aa)=0 & lookfor<>'*' then iterate
          parse var aa anam (lookfor) .
          if strip(anam)='.' | strip(anam)='..' then iterate
       end
       ibs=ibs+1
       blist.ibs=anam
       mxlen=max(length(anam),mxlen)
    end /* do */
arf=""
do il=1 to ibs
   anam=blist.il
   arf=arf||left(anam,mxlen+2)
   if length(arf)+mxlen+2>75  then do
        say arf
        arf=""
   end /* do */
end /* do */
if length(arf)>1 then say arf
say
return 1


