#!/usr/local/bin/klone

(setq args (getopts
    "ftp-list-news [options] site
lists new files in the anonymous ftp site \"site\"
if site not given, taken in env var SITE_HOST"
    ("-new" () site:new
      "create a new script to monitor a new site"
    )
    ("-dir" directory site:dir
      "the directory to list on the site, defaults to /"
    )
    ("-name" descriptive-name site:name
      "the name of the site to be used in mails and 
saved files. defaults to the env variavle SITE_NAME or site name"
    )
    ("-ls-lR" filename site:index
      "the name of the index files on the remote host if 
present. If not, the ls -lR will be done by the script"
    )
    ("-lsp" postprocessor site:decomp
      "the unix command to apply on index file after 
getting it, where %0 means the file name
e.g.: \"mv %0 %0.gz;gzip -d %0\""
    )
    ("-nols" () site:nols
      "give this flag if the index is not in ls -lR format"
    )
    ("-cache" directory (dircache "/net/koala/ftpcaches")
      "the place to store locally (cache) previous contents
of the site. Defaults to \"/net/koala/ftpcaches\""
    )
    ("-password" mail-adress site:password
      "the password to use to connect. defaults to user@"
    )
    ("-x" regexp site:excludeds
      "exclude files whose full pathname match regexp from listing
e.g.: \"^(.*[/])?ls-lt?R([.]Z|[.]gz)$\" to exclude all ls-l(t)R files"
      :multiple t
    )
    ("-X" pathname site:excludeds-exact
      "exclude files whose full pathname name is exactly equal to 
pathname from listing. e.g.: \"FILES\", \"pub/INDEX\""
      :multiple t
    )
    ("-Xf" pathname site:excludeds-exact-filename
      "exclude files whose name is equal is exactly equal to 
pathname in any directory "
      :multiple t
    )
    ("-readmes" file site:readmes
     "do not get list of new files, only get readmes
listed in file")
    ("-noreadmes" file site:noreadmes
     "only get list of new files, no readmes"
    )
    ("-all" () do-all "scans cache dir for all sites and query them")
    ("-v" () verbose "outputs some information, otherwise quite silent")
    ("-debug" () debug "debugging mode, very verbose")
))

(setq error-in-arguments "\n**************************************************
ftp-list-news must have one argument: the site name
type: \"ftp-list-news -?\" for help
**************************************************")

(if site:new 
  (with (*current-directory* dircache)
    (? "Please enter site internet address (or local directory): ")
    (setq host (read-line))
    (? "Please enter a name for this site: ")
    (setq name (read-line))
    (if (not (match "^[-a-zA-Z0-9_.]+$" name))
      (error "sorry, invalid file name: %0" name)
    )
    (if (file-stats (+ name ".ftp"))
      (error "sorry, file %0 already exists!" (+ name ".ftp"))
    )
    (with (fd (open (+ name ".ftp") :direction :output))
      (print-format fd "#!/bin/sh
export SITE_NAME; SITE_NAME=%0
export SITE_HOST; SITE_HOST=%1
export SITE_DIR;  SITE_DIR=/
ftp-list-news \\
    -X FILES -x \"^(.*[/])?ls-lt?R([.]Z|[.]gz)$\" \\
2>&1 | mail -s \"Listing of $SITE_NAME ($SITE_HOST:$SITE_DIR) at `date`\" \\
\\\n"
	name host)
    )
    (print-format "Ok, edit file %0/%1.ftp to append your mail address and tweak the options\n"
      dircache name
    )
    (sh chmod a+rwx ,(+ name ".ftp"))
    (exit 0)
))

(if (not (= 1 (length args)))
  (if do-all
    (setq site "")
    (if (not (setq site (getenv "SITE_HOST")))
      (error error-in-arguments)
    )
  )
  (setq site (getn args 0))
)

(if (and (not site:name) (not (setq site:name (getenv "SITE_NAME"))))
  (setq site:name site)
)
(if (and (not site:dir) (not (setq site:dir (getenv "SITE_DIR"))))
  (setq site:dir "/")
)

(if (not site:password) (setq site:password (+ (getenv "USER") "@")))

;;=============================================================================
;;                    end of option parsing
;;=============================================================================

(setq localindex (+ site:name ".llr"))
(setq localindex.old (+ localindex ".old"))
(setq *current-directory* dircache)
(setq readme-prefix (+ "___" (String *current-process-id*) "___"))

(if site:excludeds-exact-filename (progn
    (if (not site:excludeds) (setq site:excludeds (copy site:excludeds)))
    (dolist (excluded-exact site:excludeds-exact-filename)
      (lappend site:excludeds (+ "^(.*[/])?"
	  (quote-string-for-regexp excluded-exact) "$")
      )
)))
(if site:excludeds-exact (progn
    (if (not site:excludeds) (setq site:excludeds (copy site:excludeds)))
    (dolist (excluded-exact site:excludeds-exact)
      (lappend site:excludeds (+ "^"
	  (quote-string-for-regexp excluded-exact) "$")
      )
)))
(dotimes (i (length site:excludeds))	;compile regexps
  (put site:excludeds i (regcomp (get site:excludeds i "^$")))
)
(setq re-line-to-name (regcomp " ([^ ]+)$"))

(defun main (&aux fs)
  (if do-all (only-do-all))
  (if site:readmes (only-list-readmes))
  (if (file-stats localindex) 
    (sh mv ,localindex ,localindex.old)
  )
  (if (= #\/ (getn site 0)) 
    (with (*current-directory* site) 
      (sh rm -f ,(+ dircache "/" localindex))
      (wait (system (list "ls" "-lR") :output 
	  (+ dircache "/" localindex) :error "/dev/null"))
    )
    (doftp site site:password site:dir 
      (if site:index (+ "get " site:index " " localindex)
	(+ "ls -lR " localindex)
  )))
  (if (and site:decomp (file-stats localindex))
    (with (command (print-format String site:decomp localindex))
      (wait (system command))
  ))
  (if debug (sh ls -l ,localindex))
  (if (and 
      (setq fs (file-stats localindex))
      (/= 0 (get fs 'size))
      (or (site:nols) (normalize-ls-lR localindex) t)
    )
    (look-for-news site site:password site:dir localindex localindex.old)
    (progn
      (if verbose
	(print-format "ftp-list-news: could not get index file for %0 (%1)\n"
	  site:name site
      ))
      (if (file-stats localindex.old) 
	(sh mv ,localindex.old ,localindex)
	(sh rm -f ,localindex)
  )))
  (sh rm -f ,localindex.old)
)

(defun look-for-news (site site:password site:dir localindex localindex.old 
    &aux
    (fd (open localindex :error ()))
    (fdold (open localindex.old :error ()))
    (oldlines (Hashtable ()))
    (lines (list))
    line
  )
  (if fdold
    (while (setq line (read-line fdold ()))
      (put oldlines line t)
  ))
  (if fd
    (if site:excludeds
      (while (setq line (read-line fd ()))
	(if (getn oldlines line) ()
	  (catch 'Excluded
	    (dolist (re-excluded site:excludeds)
	      (if (regexec re-excluded (match re-line-to-name line 1))
		(throw 'Excluded t)
	    ))
	    (lappend lines line)
      )))
      (while (setq line (read-line fd ()))
	(if (getn oldlines line) ()
	  (lappend lines line)		;new line
      ))
  ))
  (if debug (PV "look-for-news:" lines))
  (if lines
    (print-results site site:password site:dir lines)
  )  
)

(defun only-list-readmes  (&aux (lines (list)) (fd (open site:readmes)))
  (while (setq line (read-line fd ()))
    (lappend lines line))
  (if lines
    (print-results site site:password site:dir lines)
  )
  (exit 0)
)

(setq print-results:re-readme (regcomp "([^ ]*[rR][eE][aA][dD][mM][eE])$"))
(defun print-results (site site:password site:dir lines &aux
    filename
    (readmes (list))
    (i 0)
    (commands (list))
    fd
    s
  )
  (setq s (print-format String "New files on ftp site %0 (%1:%2):"
      site:name site site:dir
  ))
  (print-format "%0\n%1\n\n" s (make-string (length s) #\=))
  (dolist (line lines)
    (if (not site:readmes) (write-line line))
    (if (regexec print-results:re-readme line)
      (lappend readmes (regsub print-results:re-readme 1))
    )
  )
  
  (if (and (not site:noreadmes) readmes) (progn
      (print-format "\nREADMES:\n========\n")
      (dolist (filename readmes)
	(lappend commands (+ "get " filename " " readme-prefix (String i)))
	(incf i)
      )
      
      
      (if (= #\/ (getn site 0)) 
	(dolist	(com commands)		;local
	  (wait (system (+ "cp " site "/" (subseq com (length "get ")))))
	)
	(doftp site site:password site:dir commands)
      )
      
      (setq i 0)
      (dolist (filename readmes)
	(print-format "\n%0 %1:\n" 
	  (make-string (- 77 (length filename)) #\*) filename
	)
	(if (setq fd (open (+ readme-prefix (String i)) :error ())) (progn
	    (while (setq l (read-line fd ()))
	      (write-line l)
	    )
	    (setq fd ())
	  )
	  (print-format "*** file %0 not found! ***\n" filename)
	)
	(incf i)
      )
      (sh rm -f ,(+ readme-prefix "*"))
)))))))))
    

(defun normalize-ls-lR (file &aux
    (fd (open file))
    (lines (list))
    (dir "")
    line
    (re-empty (regcomp 
	"^(([.]:?)|([ \t]*)|(total [0-9]+)|([^ \t]+[ \t]+unreadable))$"))
    (re-dir (regcomp "^([.][/])?([^ \t]*[^:])[:]?$"))
    (re-entry (regcomp (+
	  "[dbclps-][r-][w-][xstST-][r-][w-][xstST-][r-][w-][xstST-]" ;mode
	  " +[0-9]+"			;links
	  " +[0-9a-zA-Z_-]+"		;user
	  "( +[0-9a-zA-Z_-]+)?"		;group (optional) 1
	  " +([0-9]+)"			;size 2
	  " +([^ ]+ +[^ ]+ +[^ ]+)"	;date 3
	  " +(.+)$"			;name 4
    )))
  )
  (while (setq line (read-line fd ()))
    (if (regexec re-empty line)
      ()
      
      (regexec re-dir line)
      (setq dir (regsub re-dir 2))
      
      (regexec re-entry line)
      (if (/= #\d (getn line 0))		;omit dirs
	(lappend lines (+ (normalize-date (regsub re-entry 3))
	    " " (normalize-size (Int (regsub re-entry 2))) " " 
	    (+ dir (if (= "" dir) "" "/")  (regsub re-entry 4))
	))
      )
      
      (print-format "unrecognized entry: \"%0\"\n" line)
    )
  )
  (close fd)
  (setq fd (open file :direction :output :if-exists :supersede))
  (dolist (line lines)
    (write-line line fd)
  )
  (flush fd)
  (close fd)
)

(setq normalize-date:re-date (regcomp "([A-Z][a-z][a-z]) +([0-9]+) +(.*)$"))
(setq normalize-date:re-time (regcomp "([0-9]+):([0-9])+"))
(setq normalize-date:date (cur-date))

(defun normalize-date (date &aux res
    year month day
    (mnum '("Jan" 1 "Feb" 2 "Mar" 3 "Apr" 4 "May" 5 "Jun" 6 "Jul" 7 "Aug" 8
	"Sep" 9 "Oct" 10 "Nov" 11 "Dec" 12
    ))
  )
  (if (regexec normalize-date:re-date date) (progn
      (setq day (regsub normalize-date:re-date 2))
      (setq month (regsub normalize-date:re-date 1))
      (if (regexec normalize-date:re-time (regsub normalize-date:re-date 3))
	(setq year (String
	    (if (<= (get mnum month 0) (get normalize-date:date 1))
		(get normalize-date:date 0)
		(- (get normalize-date:date 0) 1)
	    )
	))
	(setq year (regsub normalize-date:re-date 3))
      )
      (setq res (+ month (make-string (- 3 (length day))) day " "
	  (subseq year 2)
      ))
    )
    (setq res "??? ?? ??")
  )
  res      
)

(defun normalize-size (size &aux res
    (field-width 4)
  )
  (if (= size 0)
    (setq res (String size))
    (< size 1024000) 
    (setq res (+ (String (/ (+ 1023 size) 1024)) "k"))
    (< size 1048576)
    (setq res "1M")
    t
    (setq res (+ (String (/ size 1048576)) "M" 
	(subseq (String (/ (* (+ 1048576 (mod size 1048576)) 1000) 1048576)) 1)
    ))
  )
  
  (if (< (length res) field-width)
    (setq res (+ (make-string (- field-width (length res))) res))
    (> (length res) field-width)
    (setq res (subseq res 0 field-width))
  )
  res  
)

(defun doftp (site site:password site:dir &rest commands &aux
    fd
    pid
    (temp-file (+ "/tmp/ftp-list-news.coms." site))
  )
  (setq pid (system (+ "/usr/ucb/ftp -inv 2>&1 > " temp-file) :input 'fd))
  (print-format fd "open %0\n\nuser anonymous %1\nbinary\nprompt\ncd %2\n"
    site
    site:password
    site:dir
  )
  (flush fd)
  (if debug (print-format fd "pwd\ndir\n"))
  (wait (system (list "chmod" "a+rw" temp-file)))
  (doftp:command commands fd)
  (print-format fd "quit\n")
  (flush fd)
  (wait pid)
)


(defun doftp:command (command fd)
  (if (typep command List)
    (dolist (com  command) (doftp:command com fd))
    (progn
      (if debug (PV com))
      (print-format fd "%0\n" com)
      (flush fd)
  ))
)

;; go to cache and perform a search for ALL sites listed there
(defun only-do-all (&aux 
    (pids (list))
  )
  (sh chmod a+x *.ftp "2>/dev/null")
  (dolist (file (directory))
    (if (match "[.]ftp$" file)
      (lappend pids (system (list file)))
  ))
  (wait pids :blocking t)
)

(main)

(if debug (PF "DONE\n"))

;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***
