#!/bin/bash
version=1.0

# 
# Copyright (C) 2004  Thomas Sattler (tsattler@gmx.de)
# 
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.  See the GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place - Suite 330, Boston, MA  02111-1307, USA.
# 

umask 077
#you probably want to change "/tmp/" below to "/var/tmp/":
TMP=$(mktemp -d /tmp/${0##*/}.$$.XXXXXXXX) || exit
eSPACE="-|ThiS-Is-AN-eSCApEd-SPaCe|-"
DATA=$TMP/gettree.html
LINKS=$(type -p links)
CUT_AT="0.1"
minK=0

function help {
  echo "Syntax: ${0##*/} [dir] [none, one or more of the following options]"
  echo "     dir         the directory you want do check for used space"
  echo "   --cut-at      don't show files below 'float'% of his parent"
  echo "                 default is $CUT_AT, use values between 0.01 and 39"
# echo "   --debug       don't delete tmp-files if work is done"
# echo "   --get-links   download and compile a version of 'links'"
# echo "   --help        show this helpscreen (what did you expect?)"
  echo "   --keep-as     don't try to open results in browser, save them"
  echo "   --min-kb      ignore anything below 'int'KB. This can give a"
  echo "                 huge speedup on older boxes but directories that"
  echo "                 content is below 'int'KB are shown as files"
  echo "   --link-files  insert links to files for direct access"
# echo "   --version     show version ($version)"
  exit
}

#Options:
while [ "$1" ]; do
  case "$1" in
    --cut-at)
      case "$2" in
       [0-9]|[0-9].[0-9]|[0-9].[0-9][1-9]);;
       [1-3][0-9]|[1-3][0-9].[0-9]|[1-3][0-9].[0-9][1-9]);;
       *) echo cutat-arg is unusefull.; exit;;
      esac
      CUT_AT=$2
      shift
      ;;
    --debug)
      DEBUG=true
      ;;
    --get-links)
      cd $TMP
      LINKS_HP=http://artax.karlin.mff.cuni.cz/~mikulas/links/download/
      echo -n "getting current links versions ... "
      wget -qO- $LINKS_HP?M=D |
        sed '/ef="li/!d;s,.*ef="li,li,;s,".*,,' > links-versions
      echo
      select version in $(cat links-versions); do break; done
      wget -O- $LINKS_HP$version | tee $version | tar xz
      cd ${version%%.tar.gz} || exit
      ./configure
      [ "$(uname -o)" == "Solaris" ] && { # needs additional "-R/path/to/libs"
        sed '/^LIBS /s, -L\([^ ]*\) , -L\1 -R\1 ,' Makefile > Makefile.new
        mv Makefile.new Makefile
      }
      make; strip links
      [ -w /usr/local/bin ] && DIR=/usr/local/bin || {
        [ -d ~/bin ] || mkdir ~/bin; DIR=~/bin
      }; cp links $DIR/${version%%.tar.gz}
      ln -sf ${version%%.tar.gz} $DIR/links
      cd /; rm -rf $TMP
      exit
      ;;
    --help)
      help
      ;;
    --keep-as)
      [ "$2" ] || help
      touch "$2" || exit
      [ -d "$2" ] && { echo "\"$2\" is a directory."; exit; }
      [ "${2:0:1}" == "/" ] && SAVE="$2" || SAVE="$PWD/$2"
      shift
      ;;
    --link-files)
      LINK_FILES=true
      ;;
    --min-kb)
      case "$2" in
       [1-9]|[1-9][0-9]|[1-9][0-9][0-9]);;                #KB
       [1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]);;  #MB
       [1-9][0-9][0-9][0-9][0-9][0-9]);;
       *) echo mink-arg is unusefull.; exit;;
      esac
      minK=$2
      shift
      ;;
    --version)
      echo ${0##*/} v$version; exit;;
    *)
      [ "$CD" ] && help
      [ -d "$1" ] || help
      cd "$1" || exit
      CD=true
      cd -
      ;;
  esac
  shift
done

[ "$CD" ] && cd -

SECONDS=0
echo -n "proceeding \"$PWD\", please wait ... "
du -ak 2> /dev/null > $TMP/du
echo "done (${SECONDS}s)"
SUM=$[SUM+SECONDS]

SECONDS=0
echo -n "prefiltering data:  "
gawk '
  {
    gsub(/ /,"'"$eSPACE"'")
    if ($1>'"$minK"') print
    else cut++
  }
  END{print cut >"'"$TMP/du-pf-cut"'"}
' $TMP/du > $TMP/du-pf
SUM=$[SUM+SECONDS]
printf "%4s\n" ${SECONDS}s

SECONDS=0
echo -n "sorting by size:    "
sort -k1,1nr $TMP/du-pf > $TMP/du-pf-ss
SUM=$[SUM+SECONDS]
printf "%4s\n" ${SECONDS}s

SECONDS=0
echo -n "prefixing lines:    "
gawk '{
  me=substr($0,index($0,"\t")+1); size[me]=$1
  e=0; while (i=index(substr(me,e+1),"/")) e+=i
  if (e) {p=substr(me,0,e-1); print size[p],p,$0}
}' $TMP/du-pf-ss > $TMP/du-pf-ss-pp
SUM=$[SUM+SECONDS]
printf "%4s\n" ${SECONDS}s

SECONDS=0
echo -n "sorting again:      "
sort -k1,1n -k2,2 -k3,3n $TMP/du-pf-ss-pp > $TMP/du-pf-ss-pp-oo
SUM=$[SUM+SECONDS]
printf "%4s\n" ${SECONDS}s

SECONDS=0
echo -n "HTMLing data:       "
LANG= gawk '
  BEGIN{print "</PRE></HTML>"}
  function i2h(i) {
    E="K"; i+=0 #forcing i beeng a number ...
    if (i>999) { i/=1024; E="M" }
    if (i>999) { i/=1024; E="G" }
    if (i>999) { i/=1024; E="T" }
    if (i<10) return sprintf("%.1f%c",i,E);
    else return sprintf("%d%c",i,E);
  }
  function directory (p,ps,c) {
    allcut+=c
    if (!p) return; else print ""
    if (c) cs=sprintf(", %i lines cut",c); else cs=""
    print p"/:   ["i2h(ps)"B in "children[p]" files or directories"cs"]"
    print "<A NAME=\""p"\"><HR></A>"
  }
  {
    if (ops"-"op != $1"-"$2) {directory(op,ops,cut); cut=0}
    e=0; while (j=index(substr($4,e+1),"/")) e+=j; s=substr($4,e+1)
    children[$2]++; if (children[$4]) s="<A HREF=\"#"$4"\">"s"</A>/"
    else if ("'"$LINK_FILES"'") {
      h="<A HREF=\"'"${PWD%/}"'"substr($4,2)
      s=h"\"><FONT COLOR=GREEN>"s"</FONT></A>"
    }
    pz=100*$3/$1
    if (pz>'"$CUT_AT"') printf "%6s [%4.1f%%] ./%s\n",i2h($3),pz,s
    else cut++
    op=$2; ops=$1
  }
  END{
    directory(op,ops,cut)
    if ("'"$minK"'">0) {
      getline minkbcut <"'"$TMP/du-pf-cut"'"
      if (minkbcut) {
        printf " [%i files/directories with",minkbcut
        print  " less than '"$minK"'KB ignored]"
      }
    }
    if (allcut) cs=sprintf(" [%i lines (%%<'"$CUT_AT"') cut]",allcut)
    print "gt5 v'"$version ($HOSTNAME:$PWD)"':"cs
    print "<HTML><PRE>"
  }
' $TMP/du-pf-ss-pp-oo | sed 's,'"$eSPACE"', ,g' | tac > $DATA
SUM=$[SUM+SECONDS]
printf "%4s\n" ${SECONDS}s

echo "job done in ${SUM} seconds."

[ -f "$SAVE" ] && {
  cat $DATA > "$SAVE" || exit
  echo -e "\ndata successfully saved as \"$SAVE\"."
} || {
  [ "$LINKS" ] && {
    $LINKS $DATA; true
  } || {
    echo -e "\nOk. Go, open $DATA with your favorite"
    echo browser. If you have finished return to this window and press ENTER.
    echo You may want to install a textbrowser: Try \"${0##*/} --get-links\".
    read
  }
}

[ -z "$DEBUG" ] && rm -rf $TMP || echo data stored in $TMP

