/* YEPuDUPE - Eliminate duplicate URL's from Yep URL Log
 
 - please edit the line below to reflect the path/filename of your Log,
   or specify filename on command line. I.E. Yepuhtml c:\path\url.log

 - *new* this program updates the log file specified automatically now.
   No redirection, copying, or any other voodoo needed.
   
*/

UrlLog = 'd:\tm\url.log'
UrlLogBackUp = 'd:\tm\url.bak'     /* change to 'nul' for no backup */

/* don't edit any more unless you know what you are doing */
BetweenRecords = 1;
RecordLines = 0 
UrlNum = 0
URLS. = ''
Records = 0


Parse arg ln
if (ln \= '') then UrlLog = ln

ret = stream(UrlLog,'c','open read')
if ret <> 'READY:' then do
    say 'Can not open URL Log "'UrlLog'" to read.'
    exit
end

say 'Eliminating duplicate URLs from 'UrlLog

OutLines = 0
OL. = ''

do while lines(UrlLog)<>0
    ln = linein(UrlLog)
    
    if (ln \= '') & (BetweenRecords = 1) then do
        BetweenRecords = 0;        
        RecordLines = 0
        Duplicate = 0
        Records = Records + 1
    end
    
    if (ln \= '') & (BetweenRecords = 0) & (duplicate = 0) then do
        RecordLines = RecordLines + 1
        Arecord.RecordLines = ln
        if SubStr(ln,1,4) == 'URL:' then do
            do x = 1 to UrlNum 
                if URLS.x = ln then Duplicate = 1
            end
            if Duplicate = 0 then do
                UrlNum = UrlNum + 1
                URLS.UrlNum = ln
            end            
        end
    end

    if (ln == '') & (BetweenRecords = 0) then do
        betweenRecords = 1;
        if Duplicate = 0 then do
            do x = 1 to RecordLines
                OutLines = OutLines + 1;
                OL.Outlines = Arecord.x
            end
            OutLines = OutLines + 1;
            OL.Outlines = ''
        end
    end    
end
ret = lineout(UrlLog) /* close UrlLog file */

/* backup, delete, and output */
'@copy '||UrlLog||' '||UrlLogBackUp||' >nul 2>nul '
'@del '||UrlLog||' >nul 2>nul '

ret = stream(UrlLog,'c','open write');
if ret = 'READY:' then do
    do x = 1 to OutLines
        ret = lineout(UrlLog,OL.x)
    end
end
else do
     say 'Warning: could not update '||UrlLog||'! Restoring backup.'
     '@copy '||UrlLogBackup||' '||UrlLog||' >nul 2>nul '
     if rc = 0 then say 'Backup restored successfully.'
     else say UrlLogBackUp||' could not be restored! You better look into it.'
end

say 'Done processing 'Records' URLs ('UrlNum' unique, 'Records-UrlNum' dupes)'

