Bash 4chan
Ахтунг! Это даже не бета, даже не beta than noting! Это, скорее, alpha than nothing!
Итак, обещаные кому-то скрипты:
#!/usr/local/bin/bash
#chanlist.sh
#Юзанье — ./chanlist.sh chan.org [board]
brdltmp=`mktemp -t chanlist-brtlist`
wget -q -O $brdltmp "$1"
if [ "$(cat $brdltmp|grep -o frameset)" != "" ];
then
wget -O — $1$(cat $brdltmp|grep 'frame src='|egrep -o -m 1 "([a-z_0-1/]*)([a-z_0-1]+).(html|htm|php|xhtml|xml)"|head)>$brdltmp
fi
brdchset=`cat $brdltmp|enca -i`
conv=""
if [ "$(iconv -l| grep "$brdchset")" != "" ]; then conv="iconv -f $brdchset -c"; else conv="cat -"; fi
cat $brdltmp|$conv |egrep -o '(<a (([\n]?)([a-z]+)="(.[^<>]
)"([\n]?))>(.[^<>])</a>)'>$brdltmp
brlist=`mktemp -t chanlist-brdlist`
for lnk in $(cat $brdltmp|tr " " "_"); do
boardname=$(echo $lnk|egrep -o "(/([a-z0-9]+)/)")
boarddesc=$(echo $lnk|egrep -o "(>(.[^<>]+)<)"|tr -d ">"|tr -d "<"|tr "_" " ")
boardpath=$(echo $lnk|egrep -o '(href="(((http://([a-z0-9.]+)/)?)[a-z0-9/._]+)")'|egrep -o '(="((http://([a-z0-9.]+)/)?)([a-z0-9/._]+)")'|tr -d '"'|tr -d "=")
if [ "$(echo $boardpath|egrep "(http://([a-z0-9.]+))")" == "" ]; then boardpath=$1$boardpath;fi
if [ "$boardname" != "" ];
then
echo $boardname '|' $boarddesc '|' $boardpath>>$brlist
fi
done
if [ "$2" == "" ]; then
cat $brlist
else
cat $brlist|grep "/$2/"|cut -d "|" -f3|tr -d '\t'
fi
rm /tmp/chanlist*
#!/usr/local/bin/bash
#boadget.sh
thlist=`mktemp -t thlist`
pages=$(wget -q -O — $1|egrep -o '"(([0-9]{1,2}).html)'|tr -d '"'|uniq)
site=$(echo $1|egrep -o '(http://([a-z0-9./]+)/)')
#echo $site
#echo $pages
for page in $pages; do
for lnk in $(wget -q -O — $site$page|egrep -o '(([a-z0-9/]*)([0-9]{3,}).html)'); do echo $site$lnk>>$thlist; done
done
cat $thlist|uniq
rm $thlist
#!/usr/local/bin/bash
#threadget.sh
piclst=`mktemp -t thget`
wget -O $piclst $1
plinks=$(cat $piclst|egrep -o '(<a href="([a-z0-9./:]+)\.(jpg|jpeg|gif|png)")'|egrep -o "(([a-z0-9./:]+).jpg)")
plinks=$(echo $plinks|uniq)
for url in $plinks; do
wget -q -c $url
done
rm $piclst