From f627125c0fa9fdcc4a0650ab3626330a16e992a8 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Thu, 10 May 2012 02:13:27 +0000 Subject: bin/comics: add script for updating xkcd, add $state_file config xkcd is special (read: other than all others scripts) because it does not contain a timestamp information in the comics. we have to track the last script that was added to our mirror. so the xkcd script needs a state-file containing the last xkcd that was mirrored. the date of the xkcd is the day of the first discovery of a new comic. --- bin/comics/xkcd | 45 +++++++++++++++++++++++++++++++++++++++++++++ etc/settings.sh | 10 +++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100755 bin/comics/xkcd diff --git a/bin/comics/xkcd b/bin/comics/xkcd new file mode 100755 index 0000000..5af680b --- /dev/null +++ b/bin/comics/xkcd @@ -0,0 +1,45 @@ +#!/bin/bash + +source $(pwd)/$(dirname $0)/../../etc/settings.sh +tmp=$(mktemp -d) +cd "${tmp}" + +browser="Mozilla/4.76 [de] (X11; U; Linux 2.2.18 i586)" +newn="${day}.png" + +wget ${wget_args} -U "$browser" http://www.xkcd.com -O index.html + +url=$(grep -A 1 "Image URL" index.html | sed 'N;s/.*\(http.*png\)<\?.*/\1/') +imgname=$(echo "$url" | tr '/' '\n' | tail -1) + +# get additional information +img_tag=$(grep -A2 "src=\"${url}" index.html | sed 'N;N;s/.*\(]\+>\).*/\1/') +title=$(sed 's/.*title="\([^"]*\)".*/\1/'<<< $img_tag) +alt=$(sed 's/.*alt="\([^"]*\)".*/\1/'<<< $img_tag) + +if [ ! -s "${image_dir}/$newn" ]; then + # no image availalable for current date + + if ! grep -q "${url}" "${state_file}" >/dev/null; then + # new image + echo $url > "${state_file}" + + wget ${wget_args} -U "$browser" -O "${tmp}/${newn}" \ + --header="Referer: http://www.xkcd.com/" "$url" + + if [ -s "${tmp}/${newn}" ]; then + # save + mv "${tmp}/${newn}" "${image_dir}/$newn" + echo "$title" > "${image_dir}/${day}.title" + echo "$alt" > "${image_dir}/${day}.alt" + + # update symlinks + rm -f "${comic_dir}/latest.png" "${comic_dir}/latest.title" "${comic_dir}/latest.alt" + ln -s "${image_offset}/$newn" "${comic_dir}/latest.png" + ln -s "${image_offset}/${day}.title" "${comic_dir}/latest.title" + ln -s "${image_offset}/${day}.alt" "${comic_dir}/latest.alt" + fi + fi +fi + +rm -rf ${tmp} diff --git a/etc/settings.sh b/etc/settings.sh index d1f5486..d837e7d 100644 --- a/etc/settings.sh +++ b/etc/settings.sh @@ -11,7 +11,11 @@ comic=$(basename $0) year=$(date -d"$daysago days ago" +%Y) month=$(date -d"$daysago days ago" +%Y-%m) day=$(date -d"$daysago days ago" +%d) -comic_dir=$(pwd)/$(dirname $0)/../../htdocs/imgs/${comic} + +top="$(pwd)/$(dirname $0)/../../" +state_dir=${top}/var/state/ +state_file=${state_dir}/${comic}.state +comic_dir=${top}/htdocs/imgs/${comic} image_offset=${month}/ image_dir=${comic_dir}/${image_offset} @@ -19,4 +23,8 @@ if [ ! -d "${image_dir}" ]; then mkdir -p "${image_dir}" fi +if [ ! -d "${state_dir}" ]; then + mkdir -p "${state_dir}" +fi + wget_args="--timeout 100 --wait=1000 --no-cache" -- cgit v1.2.3-1-g7c22