<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Biodegradable Geek &#187; Code</title>
	<atom:link href="http://biodegradablegeek.com/category/coding/feed/" rel="self" type="application/rss+xml" />
	<link>http://biodegradablegeek.com</link>
	<description></description>
	<lastBuildDate>Tue, 22 Jun 2010 21:52:41 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=</generator>
		<item>
		<title>Calculate Your GPA Using this Bash Script</title>
		<link>http://biodegradablegeek.com/2009/05/calculate-your-gpa-using-this-bash-script/</link>
		<comments>http://biodegradablegeek.com/2009/05/calculate-your-gpa-using-this-bash-script/#comments</comments>
		<pubDate>Thu, 21 May 2009 10:18:18 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Code]]></category>
		<category><![CDATA[Snippets]]></category>
		<category><![CDATA[bash]]></category>
		<category><![CDATA[code]]></category>
		<category><![CDATA[Code example]]></category>
		<category><![CDATA[scripting]]></category>
		<category><![CDATA[Scripts]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=433</guid>
		<description><![CDATA[This Bash script is used to calculate your Grade Point Average (GPA) on the command line. Usage might not be intuitive. Please see the usage function or just run the script without passing it any arguments. The gval function should be edited to reflect your own region or university. It has been written and tested [...]]]></description>
			<content:encoded><![CDATA[<p>This Bash script is used to calculate your Grade Point Average (GPA) on the command line. Usage might not be intuitive. Please see the usage function or just run the script without passing it any arguments.</p>
<p>The <strong>gval</strong> function should be edited to reflect your own region or university. It has been written and tested on Bash 3.2.48.</p>
<pre class="brush: bash; title: ; notranslate">
#!/bin/sh
#
# Bash GPA calculator
#
# Isam | r0cketjump@yahoo.com | biodegradablegeek.com
# 05/21/2009 - Just another 4 AM project

function usage {
  echo -e &quot;\nBASH GPA Calculator&quot;
  echo
  echo -e &quot;\tAccepts an even # of arguments in the form of C G C G C G ...&quot;
  echo -e &quot;\t (C = number of credits, G = grade for the course)&quot;
  echo
  echo -e &quot;\tExample: You got a B+ in a 4 credit course, &quot;
  echo -e &quot;\t         an A in a 3 credit course, etc..&quot;
  echo
  echo -e &quot;\tUSAGE: $0 4 B+ 3 A 3 F 3 B-&quot;
  echo
  echo &quot;Acceptable grades are A B C D F WU (eq to F)&quot;
  echo
}

function calc {
  echo `echo &quot;scale=3; $1&quot; | bc`
}

function gval {
  grade=`echo &quot;$1&quot; | tr [a-z] [A-Z]`
  case $grade in
    A+ ) echo '4.3';;
    A ) echo '4';;
    A- ) echo '3.7';;

    B+ ) echo '3.3';;
    B ) echo '3.00';;
    B- ) echo '2.7';;

    C+ ) echo '2.3';;
    C ) echo '2.0';;
    C- ) echo '1.7';;

    D+ ) echo '1.3';;
    D ) echo '1.0';;
    D- ) echo '0.7';;

    F ) echo '0';;
    WF ) echo '0';;
    WU ) echo '0';;
  esac
}

# check # of arguments. is it even?
let MOD=$#%2
if [ ! $MOD -eq 0 ]; then
  usage
  exit
elif [ $# -eq 0 ]; then
  usage
  exit
fi

args=($@)
n=${#args[@]}

points=0
credits=0

for ((i=0;i&lt;$n-1;i+=2)); do
  k=${i}

  creds=${args[$k]}
  cgrade=${args[$k+1]}

  # convert cgrade (C-) to a number
  grade=`gval $cgrade`
  pts=`calc $grade*$creds`

  echo &quot;$creds * $cgrade ($grade) = $pts&quot;

  points=`calc $points+$pts`
  credits=`calc $credits+$creds`
done

gpa=`calc $points/$credits`
echo &quot;------------&quot;
echo &quot;Total points  = $points&quot;
echo &quot;Total credits = $credits&quot;
echo &quot;------------&quot;
echo &quot;** GPA (pts/crd) = $gpa&quot;
echo &quot;------------&quot;
</pre>
<p>(Script uses <strong>bc</strong> as the calculator. Change that in the calc function if you need to.)</p>
<p>I&#8217;ll never get used to Bash&#8217;s ugly ass syntax.  &#8230; esac?</p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2009/05/calculate-your-gpa-using-this-bash-script/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>How to Block AIM&#8217;s Annoying &#8216;AOL System Msg&#8217; in Pidgin</title>
		<link>http://biodegradablegeek.com/2009/05/how-to-block-aims-annoying-aol-system-msg-in-pidgin/</link>
		<comments>http://biodegradablegeek.com/2009/05/how-to-block-aims-annoying-aol-system-msg-in-pidgin/#comments</comments>
		<pubDate>Sat, 02 May 2009 04:44:12 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Code]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Productivity]]></category>
		<category><![CDATA[Tools]]></category>
		<category><![CDATA[Workarounds]]></category>
		<category><![CDATA[annoying]]></category>
		<category><![CDATA[chat]]></category>
		<category><![CDATA[pidgin]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=426</guid>
		<description><![CDATA[The following plugin for Pidgin will block the incredibly annoying and useless notifications from AOLSystemMsg on AIM. &#8220;AOL System Msg: Your screen name (mrEman) is now signed into AOL(R) Instant Messenger (TM) in 2 locations. Click here for more information.&#8221; To use, paste code in file, save file as blockaolsystemmsg.pl in ~/.purple/plugins/ and then open [...]]]></description>
			<content:encoded><![CDATA[<p>The following plugin for <a href="http://www.pidgin.im/">Pidgin</a> will block the incredibly annoying and useless notifications from AOLSystemMsg on AIM.</p>
<blockquote><p>&#8220;AOL System Msg: Your screen name (mrEman) is now signed into AOL(R) Instant Messenger (TM) in 2 locations. Click here for more information.&#8221;</p></blockquote>
<p>To use, paste code in file, save file as <strong>blockaolsystemmsg.pl</strong> in <strong>~/.purple/plugins/</strong> and then open (or re-open) Pidgin and go to Tools -&gt; Plugins (or press CTRL+U), and enable &#8220;Block AOLSystemMsg.&#8221; That should be it!</p>
<p><em>If you&#8217;re having any trouble, try going to Help -> Debug to open up Pidgin&#8217;s debug console. </em></p>
<pre class="brush: perl; title: ; notranslate">
#!/usr/bin/perl
# BlockAOLSystemMsg plugin tested on Pidgin 2.5.5. Put in ~/.purple/plugins/ and enable
use Purple;
our $target = 'AOL System Msg'; # case-insensitive
our $plugin_name = 'Block AOLSystemMsg'; 

%PLUGIN_INFO = (
  perl_api_version =&gt; 2,
  name =&gt; $plugin_name,
  version =&gt; &quot;0.1&quot;,
  summary =&gt; &quot;Blocks the screen name 'AOL System Msg'&quot;,
  description =&gt; &quot;Ignore annoying 'your SN has signed on at 2 locations' AIM message&quot;,
  author =&gt; &quot;Isam &quot;,
  url =&gt; &quot;http://biodegradablegeek.com&quot;,
  load =&gt; &quot;plugin_load&quot;,
  unload =&gt; &quot;plugin_unload&quot;
);

sub loginfo { Purple::Debug::info($plugin_name, &quot; @_\n&quot;); }
sub minimize {
  my $r = lc($_[0]);
  $r =~ s/ //g;
  return $r;
}

sub plugin_init { return %PLUGIN_INFO; }

sub plugin_load {
  my $plugin = shift;
  $target = minimize($target);
  loginfo(&quot;Sight set on '$target'&quot;);
  Purple::Signal::connect(Purple::Conversations::get_handle(),
                          'receiving-im-msg', $plugin, \&amp;callback, '');
}

sub plugin_unload {
  my $plugin = shift;
  loginfo('Block AOLSystemMsg Unloaded.');
}

sub callback {
  my ($acc, $sender, $msg, $flags) = @_;
  if (minimize($sender) eq $target) {
    loginfo(&quot;(BLOCKED) &lt;$sender&gt; $msg&quot;);
    return 1
  };
}
</pre>
<p>update: Fixed the botched code. Thanks.</p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2009/05/how-to-block-aims-annoying-aol-system-msg-in-pidgin/feed/</wfw:commentRss>
		<slash:comments>11</slash:comments>
		</item>
		<item>
		<title>How to Maintain Static Sites with Git &amp; Jekyll</title>
		<link>http://biodegradablegeek.com/2009/03/how-to-maintain-static-sites-with-git-jekyll/</link>
		<comments>http://biodegradablegeek.com/2009/03/how-to-maintain-static-sites-with-git-jekyll/#comments</comments>
		<pubDate>Wed, 01 Apr 2009 04:10:00 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Automation]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Productivity]]></category>
		<category><![CDATA[Ruby]]></category>
		<category><![CDATA[Snippets]]></category>
		<category><![CDATA[Tips]]></category>
		<category><![CDATA[Tools]]></category>
		<category><![CDATA[bash]]></category>
		<category><![CDATA[code]]></category>
		<category><![CDATA[scripting]]></category>
		<category><![CDATA[web development]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=371</guid>
		<description><![CDATA[Static sites in this context just means non-database driven sites. Your static site can be an elaborate PHP script or just a few markup and image files. For this I am using Jekyll &#8211; A neat Ruby gem that makes your static sites dynamic. It lets you create layouts and embed custom variables in your [...]]]></description>
			<content:encoded><![CDATA[<p>Static sites in this context just means non-database driven sites. Your static site can be an elaborate PHP script or just a few markup and image files. For this I am using <strong><a href="http://github.com/mojombo/jekyll/tree/master">Jekyll</a> &#8211; A neat Ruby gem that makes your static sites dynamic.</strong> It lets you create layouts and embed custom variables in your HTML (this is a &#8220;prototype&#8221; of the site). </p>
<p>Jekyll tackles all the nuisances involved in creating static pages (I used to add just enough PHP to make a layout). It works by running your prototype through some parsers and outputs plain static HTML/XML (RSS feeds) etc. It&#8217;s perfect for lightweight sites that would be impractical on WordPress, like a few static pages of information, landing pages, portfolio/resume pages, and parked domains. </p>
<p>Git takes care of keeping your development (local) and production (remote) environments synced. Git might be a little confusing if you&#8217;re learning it with the mindset that it works like Subversion. </p>
<p><strong>I&#8217;ll update this post when the guide is done. For now, the following will assume you&#8217;re familiar with Jekyll (or at least have an empty file in the prototype directory) and git. This Bash script simplifies creating the remote git repository:</strong></p>
<p>** please read through the code and make sure you know what this does, and what you&#8217;re doing. As of now, this is bias towards my own Apache/vhost setup. It&#8217;s trivial to edit for your specific needs. <strong>You&#8217;re using this at your own risk</strong>.</p>
<p>(<a href="http://code.biodegradablegeek.com/repogen.sh" target="_blank">direct link &#8211; repogen.sh</a>)</p>
<pre lang="bash">
#!/bin/sh
#
# 04/01/2009 | http://biodegradablegeek.com | GPL
#
# You should be in site (NOT public) root (be in same dir as public/ log/ etc)
# proto/ is created and will house the jekyll prototype
# public/ will be the generated static site
# the public/ folder will be REMOVED and regenerated on every push
# 

if [ -z "$1" ]; then
  echo "Usage: ./repogen.sh domain.comn"
  exit
fi

# optional. will make it easier to copy/paste cmd to clone repo
SSHURL="ssh.domain.com"
URL="$1"

echo "** creating tmp repo"
mkdir proto
cd proto
git init
touch INITIAL
git add INITIAL
git commit -a -m "Initial Commit"

echo "** creating bare repo"
cd ..
git clone --bare proto proto.git
mv proto proto.old
git clone proto.git
rm -rf proto.old

echo "** generating hook"
HOOK=proto.git/hooks/post-update

mv $HOOK /tmp
echo '#!/bin/sh' >> $HOOK
echo '# To enable this hook, make this file executable by "chmod +x post-update".' >> $HOOK
echo '#exec git-update-server-info' >> $HOOK
echo '' >> $HOOK
echo '' >> $HOOK
echo 'URL='"$URL" >> $HOOK
echo 'PROTO="/home/$USER/www/$URL/proto"' >> $HOOK
echo 'PUBLIC="/home/$USER/www/$URL/public"' >> $HOOK
echo  '' >> $HOOK
echo 'export GIT_DIR="$PROTO/.git"' >> $HOOK
echo 'pushd $PROTO > /dev/null' >> $HOOK
echo 'git pull' >> $HOOK
echo 'popd > /dev/null' >> $HOOK
echo '' >> $HOOK
echo "echo -----------------------------" >> $HOOK
echo "echo '** Pushing changes to '$URL" >> $HOOK
echo "echo '** Moving current public to /tmp'" >> $HOOK
echo 'mv "$PUBLIC" "/tmp/'$URL'public-`date '+%m%d%Y'`"' >> $HOOK
echo 'echo "** Generating new public"' >> $HOOK
echo 'jekyll "$PROTO" "$PUBLIC"' >> $HOOK

echo "** enabling hook"
chmod a+x $HOOK 

echo "** clone repo on local machina. example:"
echo "git clone ssh://$USER@$SSHURL/~$USER/www/$SSHURL/proto.git"
</pre>
<p><strong>Usage</strong></p>
<p>Your site structure might be different. <strong>repogen.sh</strong> is made by pasting the above code in a new file, and then chmod a+x to make it executable. This should be done on the remote server.</p>
<pre lang="bash">
cd www/domain.com/

ls
public/ private/ log/ cgi-bin/

./repogen.sh domain.com
</pre>
<p>Now on your local machine, clone the new repo, move your files in, and push:</p>
<pre lang="bash">
git clone ssh://[username]@ssh.domain.com/~[username]/www/domain.com/proto.git
cd proto/
cat "hello, world" > index.htm
git add index.htm
git commit -a -m 'first local commit'
git push
</pre>
<p>After you push your changes, the post-update hook will delete the public/ directory (the root of the site). This dir and its contents are automatically generated and will get wiped out on EVERY push. Keep this in mind. All your changes and content should reside in proto/. </p>
<p>The proto/ repo will pull in the new changes, and then Jekyll will be invoked to generate the updated site in public/ from the prototype.</p>
<p>Should you need to edit it, the <strong>post-update hook</strong> is in the bare git repo (proto.git/hooks/)</p>
<p>Thanks to the authors in the posts below for sharing ideas. I first read this git method on dmiessler&#8217;s site. </p>
<p><strong>Resources:</strong><br />
<a href="http://dmiessler.com/blog/using-git-to-maintain-your-website">dmiessler.com &#8211; using git to maintain static pages</a><br />
<a href="http://toroid.org/ams/git-website-howto">toroid.org &#8211; using git to manage a web site</a><br />
<a href="http://github.com/mojombo/jekyll/tree/master">Jekyll @ GitHub</a><br />
<a href="http://media.pragprog.com/titles/tsgit/chap-005-extract.html">git info</a><br />
<a href="http://www.nardol.org/2009/2/19/git-basics-reversing-the-git-sucks-effect">more git info</a></p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2009/03/how-to-maintain-static-sites-with-git-jekyll/feed/</wfw:commentRss>
		<slash:comments>4</slash:comments>
		</item>
		<item>
		<title>Scraping Google Trends with Mechanize and Hpricot</title>
		<link>http://biodegradablegeek.com/2009/01/scraping-google-trends-with-mechanize-and-hpricot/</link>
		<comments>http://biodegradablegeek.com/2009/01/scraping-google-trends-with-mechanize-and-hpricot/#comments</comments>
		<pubDate>Sat, 24 Jan 2009 06:53:06 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Automation]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Ruby]]></category>
		<category><![CDATA[Scraping]]></category>
		<category><![CDATA[Scripts]]></category>
		<category><![CDATA[Code example]]></category>
		<category><![CDATA[making monies]]></category>
		<category><![CDATA[programming]]></category>
		<category><![CDATA[public domain]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=313</guid>
		<description><![CDATA[This is a small Ruby script that fetches the 100 trends of the day for a specific date. If multiple dates are searched, one can find out how many times a keyword occurred between two dates, or just find out what keywords are constantly appearing on the top 100 list. Very profitable info! but alas, [...]]]></description>
			<content:encoded><![CDATA[<p>This is a small Ruby script that fetches the 100 trends of the day for a specific date. If multiple dates are searched, one can find out how many times a keyword occurred between two dates, or just find out what keywords are constantly appearing on the top 100 list. <strong>Very profitable info!</strong> but alas, the script is incomplete and one must implement the &#8220;implement me!&#8221; methods to get full functionality. This, in its current state, should serve as a good starting point for scraping Google Trends.</p>
<p>On a technical note, it&#8217;s using mechanize, hpricot, tempfile (for the cache). A lot of this is just <a href="http://en.wikipedia.org/wiki/Copy_and_paste_programming">copy &amp; paste programming</a> from the <a href="http://biodegradablegeek.com/2009/01/animecrazy-scraper-example-using-hpricot-mechanize/">earlier anime scraper</a>. </p>
<p>To grab the gems <em>(rdoc takes 10x as long as the gem to fetch and install)</em>:</p>
<pre lang="bash">
sudo gem install mechanize --no-rdoc
sudo gem install hpricot --no-rdoc
</pre>
<pre lang="ruby">
#!/usr/bin/env ruby
# biodegradablegeek.com
# public domain
# 

require 'rubygems'
require 'hpricot'
require 'tempfile'
require 'mechanize'
#require 'highline/import'
#HighLine.track_eof = false

$mech = WWW::Mechanize.new
$mech.user_agent_alias = 'Mac Safari'
$master = []

def puts2(txt=''); puts "*** #{txt}"; end

class Cache
  def initialize
    # Setup physical cache location
    @path = 'cache'
    Dir.mkdir @path unless File.exists? @path

    # key/val = url/filename (of fetched data)
    @datafile = "#{@path}/cache.data"
    @cache = load @datafile
  end

  def put key, val
    tf = Tempfile.new('googletrends', @path)
    path = tf.path
    tf.close! # important!

    puts2 "Saving to cache (#{path})"
    open(path, 'w') { |f|
      f.write(val)
      @cache[key] = path
    }

    save @datafile
  end

  def get key
    return nil unless exists?(key) &#038;&#038; File.exists?(@cache[key])
    open(@cache[key], 'r') { |f| f.read }
  end

  def files
    @cache.values
  end

  def first
    @cache.first
  end

  def exists? key
    @cache.has_key? key
  end

private
  # Load saved cache
  def load file
    return File.exists?(file) ? YAML.load(open(file).read) : {}
  end

  # Save cache
  def save path
    open(path, 'w') { |f|
      f.write @cache.to_yaml
    }
  end
end

$cache = Cache.new

def fetch(url)
  body = $mech.get(url).body()
  $cache.put(url, body)
  body
end

def getPage(url)
  body = $cache.get(url) 

  if body.nil?
    puts "Not cached. Fetching from site..."
    body = fetch url
  end
  body
end

def loadState
  mf = 'cache/master.data'
  $master = File.exists?(mf) ? YAML.load(open(mf).read) : {}
  $master = {} if $master==false
end

def saveState
  open('cache/master.data', 'w+') { |f|
    f.write $master.to_yaml
  }
end

def main
  #loadState

  # Grab top 100 Google Trends (today)
  #date = Time.now.strftime '%Y-%m-%d'
  date = '2009-01-21'

  puts2 "Getting Google's top 100 search trends for #{date}"
  url = "http://www.google.com/trends/hottrends?sa=X&#038;date=#{date}"
  puts2 url

  begin
    body = getPage(url)
  rescue WWW::Mechanize::ResponseCodeError
    puts2 "Couldn't fetch URL. Invalid date..?"
    exit 5
  end

  puts2 "Fetched page (#{body.size} bytes)"

  if body['There is no data on date']
    puts2 'No data available for this date.'
    puts2 'Date might be too old or too early for report, or just invalid'
    exit 3
  end

  doc = Hpricot(body)

  (doc/"td[@class='hotColumn']/table[@class='Z2_list']//tr").each do |tr|
    td = (tr/:td)
    num = td[0].inner_text.sub('.','').strip
    kw = td[1].inner_text
    url = (td[1]/:a).first[:href]
    Keyword.find_or_new(kw) << Occurance.new(num, date, url)
  end
  puts "Got info on #{$master.size} keywords for #{date}"
  puts "keyword '#{$master.first.name}' occured #{$master.first.occurances} times"
end

class Occurance
  attr_accessor :pos, :date, :url
  def initialize(pos, date, url)
    @pos = pos
    @date = date
    @url = url
  end
end

class Keyword
  attr_accessor :name, :occurances
  def initialize(name)
    @name = name
    @occurances = []
    @position_average = nil
    @count = nil
    $master << self
  end

  def self.find_or_new(name)
    x = $master.find { |m| name==m.name }
    x || Keyword.new(name)
  end

  def << occurance
    @occurances << occurance
  end

  def occured_on? datetime
    raise 'implement me'
  end

  def occured_between? datetime
    raise 'implement me'
  end

  def occurances datetime=nil
    raise 'implement me' if datetime
    @occurances.size
  end

  def occurances_between datetime
    raise 'implement me'
  end

  def pos_latest
    @occurances.last.date
  end

  def pos_average
    @position_average
  end

  def pos_average_between datetime
    raise 'implement me'
  end
end

#   Instance= [num, date, url]
#   Keyword=[Instance, Intance, Instance]
#   Methods for keywords:
#   KW.occured_on? date
#   KW.occured_between? d1, d2
#   KW.occurances
#   KW.occurances_between? d1, d2
#   KW.pos_latest
#   KW.pos_average
#   KW.pos_average_between

#   KW has been on the top 100 list KW.occurances.size times
#   The #1 keywords for the month of January: Master.sort_by KW.occurances_between? Jan1,Jan31.pos_average_between Jan1,Jan31
#
#   Top keywords: sort by KW.occurances.size = N keyword was listed the most.
#   Top keywords for date D: Master.sort_by KW.occured_on (x).num

main
</pre>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2009/01/scraping-google-trends-with-mechanize-and-hpricot/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
		</item>
		<item>
		<title>AnimeCrazy Scraper Example Using Hpricot &amp; Mechanize</title>
		<link>http://biodegradablegeek.com/2009/01/animecrazy-scraper-example-using-hpricot-mechanize/</link>
		<comments>http://biodegradablegeek.com/2009/01/animecrazy-scraper-example-using-hpricot-mechanize/#comments</comments>
		<pubDate>Sun, 11 Jan 2009 20:44:40 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Automation]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Ruby]]></category>
		<category><![CDATA[Scraping]]></category>
		<category><![CDATA[Scripts]]></category>
		<category><![CDATA[Code example]]></category>
		<category><![CDATA[hpricot]]></category>
		<category><![CDATA[mechanize]]></category>
		<category><![CDATA[tuts]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=306</guid>
		<description><![CDATA[This is a little (as of now incomplete) scraper I wrote to grab all the anime video code off of AnimeCrazy (dot) net. This site doesn&#8217;t host any videos on its own server, but just embeds ones that have been uploaded to other sites (Megavideo, YouTube, Vimeo, etc). I don&#8217;t know who the original uploaders [...]]]></description>
			<content:encoded><![CDATA[<p>This is a little <em>(as of now incomplete)</em> scraper I wrote to grab all the anime video code off of AnimeCrazy (dot) net. This site doesn&#8217;t host any videos on its own server, but just embeds ones that have been uploaded to other sites (Megavideo, YouTube, Vimeo, etc). I don&#8217;t know who the original uploaders of the videos are, but I&#8217;ve seen this same collection of anime links being used on some other sites. This site has about 10,000 episodes/parts (1 movie may have 6+ parts). The scraper below was only tested with &#8220;completed anime shows&#8221; and got around 6300 episodes. The remaining content (anime movies and running anime shows) should work as-is, but I personally held off on getting those because I want to examine them closely to try cleaning up the inconsistencies as much as possible.</p>
<p>This scraper needs some initial setup and <strong>won&#8217;t work out of the box</strong>, but I&#8217;m including it here in the hopes that it will serve as a decent example of a small real world scraper, if you&#8217;re looking to learn the basics of scraping with <a href="http://redhanded.hobix.com/inspect/hpricot01.html">Hpricot</a> and Mechanize. Let me know if you find any use for it. I will update the posted code later this week when I have time to complete it and add some more features.</p>
<p>There&#8217;s one major problem with the organization of episodes on AnimeCrazy, and it&#8217;s the fact that some episodes are glued together into one post. Right now the scraper stops and asks you how to proceed when it comes across such a post. You basically need to tell the scraper if a post (page) contains 1 episode (video) or multiple. If there&#8217;s 1, it proceeds on its own, but if there&#8217;s two, it requires that you give it the names and links of each individual episode (part1 and part2 usually). Sometimes 2 episodes are together in 1 video. Sorta like those music albums on KaZaA or LimeWire that are basically ripped as one huge mp3 instead of individual songs.</p>
<p>This only accounts for maybe 30-40 out of 6000 videos, and it&#8217;s not that big of a deal because the amount of work needed to proceed with the scraping is small, but it IS work, and is a bitch slap to the entire concept of automation, but coding around the issue is a major hassle and there would still be a high chance that some inconsistencies will still come through. It would be far less work to just find another anime site which is far more consistent, though the reason animecrazy is good is because it&#8217;s active, and the site IS updated manually these days, as far as I can tell.</p>
<p>BTW, <strong><a href="http://whytheluckystiff.net/">Why The Lucky Stiff rocks</a>, and Hpricot is amazing.</strong> But the serious scrapologist should consider <a href="http://blog.labnotes.org/2006/07/11/scraping-with-style-scrapi-toolkit-for-ruby/">scrAPI</a> or <a href="http://scrubyt.org/">sCRUBYt</a> (uses Hpricot) for big projects.</p>
<pre lang="ruby">#!/usr/bin/env ruby
# License: Public domain. Go sell it to newbs on DigitalPoint.

require 'rubygems'
require 'hpricot'
require 'mechanize'
require 'tempfile'
require 'highline/import'
HighLine.track_eof = false

$mech = WWW::Mechanize.new
$mech.user_agent_alias = 'Mac Safari'

###############################
$skip_until = false
DEBUG=false
###############################

def debug?
  DEBUG
end

def puts2(txt='')
  puts "*** #{txt}"
end

#  Anime has: title, type (series, movie), series
#  Episode has name/#, description, parts (video code)

class Episode
  attr_accessor :name, :src, :desc, :cover
  def initialize(title, page)
    @src = page # parts (megavideo, youtube etc)
    @name = title
    @desc = nil # episode description
    @cover = nil # file path
  end
end

class Anime
  attr_accessor :name, :page, :completed, :anime_type, :episodes
  def initialize(title, page)
    @name = title
    @page = page
    @episodes = []
    @anime_type = 'series'
    @completed = false
  end

  def complete!
    @completed = true
  end

  def episode! episode
    @episodes &lt;&lt; episode
  end
end

class Cache
  def initialize
    # Setup physical cache location
    @path = 'cache'
    Dir.mkdir @path unless File.exists? @path

    # key/val = url/filename (of fetched data)
    @datafile = "#{@path}/cache.data"
    @cache = load @datafile
    #puts @cache.inspect
  end

  def put key, val
    tf = Tempfile.new('animecrazy', @path)
    path = tf.path
    tf.close! # important!

    puts2 "Saving to cache (#{path})"
    open(path, 'w') { |f|
      f.write(val)
      @cache[key] = path
    }

    save @datafile
  end

  def get key
    return nil unless exists?(key) &amp;&amp; File.exists?(@cache[key])
    open(@cache[key], 'r') { |f| f.read }
  end

  def exists? key
    @cache.has_key? key
  end

private
  # Load saved cache
  def load file
    return File.exists?(file) ? YAML.load(open(file).read) : {}
  end

  # Save cache
  def save path
    open(path, 'w') { |f|
      f.write @cache.to_yaml
    }
  end
end

$cache = Cache.new

def fetch(url)
  body = $mech.get(url).body()
  $cache.put(url, body)
  body
end

def getPage(url)
  # First let's see if this is cached already.
  body = $cache.get(url) 

  if body.nil?
    puts "Not cached. Fetching from site..."
    body = fetch url
  end
  body
end

def main
  # Open anime list (anime_list = saved HTML of
<ul>...</ul>

sidebar from animecrazy.net)
  anime_list = Hpricot(open('anime_list', 'r') { |f| f.read })
  puts2 "Anime list open"

  # Read in the URL to every series
  masterlist = []

  (anime_list/:li/:a).each do |series|
    anime = Anime.new(series.inner_text, series[:href])
    masterlist &lt;&lt; anime
    puts2 "Built structure for #{anime.name}..."
  end

  puts2

  puts2 "Fetched #{masterlist.size} animes. Now fetching episodes..."
  masterlist.each do |anime|
    puts2 "Fetching body (#{anime.name})"
    body = getPage(anime.page)
    puts2 "Snatched that bitch (#{body.size} bytes of Goku Goodness)"
    puts2

    doc = Hpricot(body)
    (doc/"h1/a[@rel='bookmark']").each do |episode|
      name = clean(episode.inner_text)

      if $skip_until
        #$skip_until = !inUrl(episode[:href], 'basilisk-episode-2')
        #$skip_until = nil == name['Tsubasa Chronicles']
        puts2 "Resuming from #{episode[:href]}" if !$skip_until
        next
      end

      # Here it gets tricky. This is a major source of inconsistencies in the site.
      # They group episodes into 1 post sometimes, and the only way to find
      # out from the title of the post is by checking for the following patterns
      # (7 and 8 are example episode #s)
      # X = 7+8, 7 + 8, 7 and 8, 7and8, 7 &amp; 8, 7&amp;8

      # If an episode has no X then it is 1 episode.
      # If it has multiple parts, they are mirrors.
      if single_episode? name
        begin

          puts2 "Adding episode #{name}..."
          ep = Episode.new(name, episode[:href])
          ep.src = getPage(episode[:href])
          anime.episode! ep
        rescue WWW::Mechanize::ResponseCodeError
          puts2 "ERROR: Page not found? Skipping..."
          puts name
          puts2 episode[:href]
        end
      else
        # If an episode DOES have X, it *may* have 2 episodes (but may have mirrors, going up to 4 parts/vids per page).
        # Multiple parts will be the individual episodes in chronological order.
        puts2 "Help me! I'm confused @ '#{name}'"
        puts2 "This post might contain multiple episodes..."

        puts2 "Please visit this URL and verify the following:"
        puts episode[:href]

        if agree("Is this 1 episode? yes/no ")
          begin
            puts2 "Adding episode #{name}..."
            ep = Episode.new(name, episode[:href])
            ep.src = getPage(episode[:href])
            anime.episode! ep
          rescue WWW::Mechanize::ResponseCodeError
            puts2 "ERROR: Page not found? Skipping..."
            puts name
            puts2 episode[:href]
          end
        else
          more = true
          while more
            ename = ask("Enter the name of an episode: ")
            eurl =  ask("Enter the URL of an episode: ")

            begin
              puts2 "Adding episode #{ename}..."
              ep = Episode.new(name, episode[:href])
              ep.src = getPage(episode[:href])
              anime.episode! ep
            rescue WWW::Mechanize::ResponseCodeError
              puts2 "ERROR: Page not found? Skipping..."
              puts name
              puts2 episode[:href]
            end
            more = agree("Add another episode? Y/N")
          end
          puts2 "Added episodes manually... moving on"
        end
      end
    end
    anime.complete!
    # XXX save the entire anime object, instead of just cache
  end
end

def inTitle(document, title)
  return (document/:title).inner_text[title]
end

def inUrl(url, part)
  return url[part]
end

def single_episode?(name)
  !(name =~ /[0-9] ?([+&amp;]|and) ?[0-9]/)
end

def clean(txt)
  # This picks up most of them, but some are missing. Like *Final* and just plain "Final"
  txt[' (Final)']='' if txt[' (Final)']
  txt[' (Final Episode)']='' if txt[' (Final Episode)']
  txt[' (FINAL)']='' if txt[' (FINAL)']
  txt[' (FINAL EPISODE)']='' if txt[' (FINAL EPISODE)']

  txt['(Final)']='' if txt['(Final)']
  txt['(Final Episode)']='' if txt['(Final Episode)']
  txt['(FINAL)']='' if txt[' (FINAL)']
  txt['(FINAL EPISODE)']='' if txt[' (FINAL EPISODE)']

  txt
end

main</pre>
<p>If you&#8217;re writing your own scraper and would like to use the minimal caching functionality present below, you can gut everything in main() out and put in your own code. Feel free to <a href="/contact">contact me for assistance</a>.</p>
<p>Here is some sample output:<br />
<span id="more-306"></span></p>
<pre lang="text">*** Adding episode Initial D: Episode 1 (Stage 2)...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090111-12300-mbdpcl-0)
*** Fetching body (Initial D: Third Stage)
*** Snatched that bitch (77695 bytes of Goku Goodness)
***
*** Adding episode Initial D: Third Stage...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090111-12300-ea69nr-0)
*** Fetching body (Kaiji)
*** Snatched that bitch (87553 bytes of Goku Goodness)
***
*** Adding episode Basilisk Episode 4...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090110-14992-fomoh0-0)
*** Adding episode Basilisk Episode 3...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090110-14992-1dx9xm-0)
*** Adding episode Basilisk Episode 2...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090110-14992-5xt774-0)
*** Adding episode Basilisk Episode 1...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090110-14992-br5fxd-0)
*** Adding episode Tsubasa Chronicles: Tokyo Revelations Episode 3...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090110-14992-zmuwix-0)
*** Adding episode Tsubasa Chronicles: Tokyo Revelations Episode 2...
Not cached. Fetching from site...
*** Saving to cache (cache/animecrazy20090110-14992-1ah20eg-0)
*** Adding episode Tsubasa Chronicles: Tokyo Revelations Episode 1...
Not cached. Fetching from site...</pre>
<p>This was written for fun, but primarily profit, and not for my own viewing pleasure. The only anime I&#8217;ve seen was Akira a decade or so ago, and only because the cover looked cool, but feel free to recommend your favorites.</p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2009/01/animecrazy-scraper-example-using-hpricot-mechanize/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>Quick BASH Script to Dump &amp; Compress a MySQL Database</title>
		<link>http://biodegradablegeek.com/2008/12/quick-bash-script-to-dump-compress-a-mysql-database/</link>
		<comments>http://biodegradablegeek.com/2008/12/quick-bash-script-to-dump-compress-a-mysql-database/#comments</comments>
		<pubDate>Sat, 27 Dec 2008 18:01:19 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Automation]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Projects]]></category>
		<category><![CDATA[Scripts]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[backups]]></category>
		<category><![CDATA[mysql]]></category>
		<category><![CDATA[sql]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=299</guid>
		<description><![CDATA[A quick script I whipped up to dump my MySQL database. Usage: sh backthatsqlup.sh (be warned that it dumps ALL databases. This can get huge uncompressed) #!/bin/sh # Isam (Biodegradablegeek.com) public domain 12/28/2008 # Basic BASH script to dump and compress a MySQL dump out=sequel_`date +'%m%d%Y_%M%S'`.sql dest=/bx/ function e { echo -e "n** $1" } [...]]]></description>
			<content:encoded><![CDATA[<p>A quick script I whipped up to dump my MySQL database.<br />
<strong>Usage: sh backthatsqlup.sh</strong></p>
<p><em>(be warned that it dumps ALL databases. This can get huge uncompressed)</em></p>
<pre lang="bash">#!/bin/sh
# Isam (Biodegradablegeek.com) public domain 12/28/2008
# Basic BASH script to dump and compress a MySQL dump

out=sequel_`date +'%m%d%Y_%M%S'`.sql
dest=/bx/

function e {
  echo -e "n** $1"
}

e "Dumping SQL file ($out). May take awhile..."
#echo "oh snap" &gt; $out
sudo mysqldump -u root -p --all-databases &gt; $out
if [ $? -ne 0 ]; then
  e "MySQL dump failed. Check that server is up and your username/pass"
  exit 7
fi

e "Uncompressed SQL file size"
du -hs $out

e "Compressing SQL file"
gz=$out.tar.gz
tar -zvvcf $gz $out
rt=$?

if [ $rt -ne 0 ]; then
  e "tar failed (error=$rt). Will NOT remove uncompressed SQL file"
else
  e "Removing uncompressed SQL file"
  rm -f $out
  out=$gz

  e "Compressed SQL file size"
  du -hs $out
fi

e "Moving shit to '$dest'"
sudo mv $out $dest</pre>
<p><a href="http://code.biodegradablegeek.com/backthatsqlup.sh">BackThatSqlUp.sh</a></p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2008/12/quick-bash-script-to-dump-compress-a-mysql-database/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		</item>
		<item>
		<title>Refactoring Tip: Eliminating Model.find(params[:id]) Duplication</title>
		<link>http://biodegradablegeek.com/2008/12/refactoring-tip-eliminating-modelfindparamsid-duplication/</link>
		<comments>http://biodegradablegeek.com/2008/12/refactoring-tip-eliminating-modelfindparamsid-duplication/#comments</comments>
		<pubDate>Sun, 07 Dec 2008 15:59:36 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Code]]></category>
		<category><![CDATA[Ruby on Rails]]></category>
		<category><![CDATA[Snippets]]></category>
		<category><![CDATA[Tips]]></category>
		<category><![CDATA[rails]]></category>
		<category><![CDATA[refactoring tips]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=293</guid>
		<description><![CDATA[In a controller, you&#8217;ll commonly have a method that requires you have an instance variable containing the object you&#8217;re working with. An example would be the show, edit, update, and destroy methods (REST). To eliminate having find(params[:id]) in multiple methods, you can use before_filter, like this: class Admin::PostsController < Admin::ApplicationController before_filter :find_post, :only => [:show, [...]]]></description>
			<content:encoded><![CDATA[<p>In a controller, you&#8217;ll commonly have a method that requires you have an instance variable containing the object you&#8217;re working with. An example would be the <strong>show</strong>, <strong>edit</strong>, <strong>update</strong>, and <strong>destroy</strong> methods (REST).</p>
<p>To eliminate having find(params[:id]) in multiple methods, you can use before_filter, like this:</p>
<pre lang="rails">
class Admin::PostsController < Admin::ApplicationController
  before_filter :find_post, :only => [:show, :edit, :update, :destroy]
  rescue_from(ActiveRecord::RecordNotFound) { |e| render :text => "
<h2>Post not found</h2>

" }

  def index
    @posts = Post.find(:all)
  end

  def show
  end

  def new
    @post = Post.new
  end

  def create
    @post = Post.new
  end

  def edit
  end

  def update
  end

  def destroy
  end

protected
  def find_post(id = params[:id])
    @post = Post.find(id)
  end
end
</pre>
<p>(Thanks Jon)</p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2008/12/refactoring-tip-eliminating-modelfindparamsid-duplication/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Using Javascript to Populate Forms During Development</title>
		<link>http://biodegradablegeek.com/2008/12/using-javascript-to-populate-forms-during-development/</link>
		<comments>http://biodegradablegeek.com/2008/12/using-javascript-to-populate-forms-during-development/#comments</comments>
		<pubDate>Sun, 07 Dec 2008 15:43:41 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Automation]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Ruby on Rails]]></category>
		<category><![CDATA[Snippets]]></category>
		<category><![CDATA[javascript]]></category>
		<category><![CDATA[jquery]]></category>
		<category><![CDATA[rails]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=291</guid>
		<description><![CDATA[During development, working with forms quickly gets annoying because you have to constantly fill in each field, sometimes with unique info. One way around this is to write a little Javascript code that just populates the fields. I use something like this on the bottom of the form. I had jQuery no-conflict mode on in [...]]]></description>
			<content:encoded><![CDATA[<p>During development, working with forms quickly gets annoying because you have to constantly fill in each field, sometimes with unique info. One way around this is to write a little Javascript code that just populates the fields. I use something like this on the bottom of the form. I had jQuery no-conflict mode on in this case. In your app you might be able to get away replacing _j() with $():</p>
<pre lang="rails">
<% if ENV['RAILS_ENV']=='development' -%>
<!-- Generate random field data -->
<script lang="text/javascript">
  jQuery(function() {
    if (typeof(_j)=='undefined') _j = jQuery;
    function randstr() {
      return Math.floor(Math.random()*99999)
    }

    _j('#name').val('Chuck Norris');
    _j('#company_name').val('Roundhouse LLC');
    _j('#email').val('moo'+randstr()+'@yawhoo.com');
    _j('#login').val('user'+randstr());
    _j('#password').val('admin');
    _j('#password_confirmation').val('admin');
  })
</script>
<% end -%>
</pre>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2008/12/using-javascript-to-populate-forms-during-development/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>Learning to Read and Grok Other People&#8217;s Code</title>
		<link>http://biodegradablegeek.com/2008/11/learning-to-read-and-grok-other-peoples-code/</link>
		<comments>http://biodegradablegeek.com/2008/11/learning-to-read-and-grok-other-peoples-code/#comments</comments>
		<pubDate>Sat, 15 Nov 2008 05:21:08 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Code]]></category>
		<category><![CDATA[Productivity]]></category>
		<category><![CDATA[Tips]]></category>
		<category><![CDATA[habits]]></category>
		<category><![CDATA[motivation]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[oss]]></category>
		<category><![CDATA[persistence]]></category>
		<category><![CDATA[reading code]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=269</guid>
		<description><![CDATA[One reason many people don&#8217;t contribute to open source apps is because they find it daunting to look through somebody else&#8217;s code. Some might even think that it&#8217;s just simpler to write something from scratch than to study someone&#8217;s work. This isn&#8217;t true, and reading foreign code is something get used to and excel at [...]]]></description>
			<content:encoded><![CDATA[<p>One reason many people don&#8217;t contribute to open source apps is because they find it daunting to look through somebody else&#8217;s code. Some might even think that it&#8217;s just simpler to write something from scratch than to study someone&#8217;s work. This isn&#8217;t true, and reading foreign code is something get used to and excel at over time. It&#8217;s a necessary skill for every programmer, and has many benefits.</p>
<p>A huge benefit is the massive amount of information you learn and get accustomed to in a short period of time. There&#8217;s no way to download O&#8217;reilly PDFs into your brain just yet, but grokking source code written by those much more experienced than you is one of the fastest ways to see and practice everything you&#8217;re been learning in theory (books, sites, classes).</p>
<p>It&#8217;s certainly overwhelming to jump head first into a huge app trying to understand every line. I think it&#8217;s common for people to open up some code, read it for a few minutes and then never touch it again because they don&#8217;t understand it. This was the case with me when I began programming. Here are some ways i used to justify putting off the need to read third party code.</p>
<p>Their code style didn&#8217;t suit my taste, i.e., they add the opening curly bracket under the function definition, and I would find myself changing their brackets and formatting more than I spent time actually looking at the logic.</p>
<p>I told myself I would learn much more by re-inventing the wheel, or have more control over my app if I built it from scratch. This is only partial true, but the cons outweight the pros. Reinventing the wheel means diviating from writing program logic and having to learn something that might not even remotely be related to the project I intended to start or finish. Here&#8217;s an example that used to be common.</p>
<p><span id="more-269"></span></p>
<p>I would happily hack away and then realize that I need a string library (<strong>I &lt;3 C</strong>). At this point I could have downloaded one, and continued hacking at my app. But instead I would start a new project aimed at writing an efficient string library. Before I knew it, I&#8217;m hours into my string lib and totally abandoned my main project. I usually  anyway. The funny thing is that sometimes I&#8217;ll just give up and use a third party lib, but most of the time I just ended up scrapping both projects. What a waste of time.</p>
<p>Everyone has their own way of learning other people&#8217;s code. I found that what works best for me is to download an app and just browse the source indifferently. It&#8217;s not good, it&#8217;s not bad, it&#8217;s not ugly or inefficient or godly. I just try to get a basic idea of where everything is, what plugins are being used, etc. Layout stuff is in this general area, settings here, plugins are all in there, etc. This step is probably useless and I only do it as a guilt-free way to avoid working for 10 minutes. Or, maybe it does help.</p>
<p>Then I begin by setting up the environment. Put in my DB settings, make sure I have all the requirements installed, etc. Logs are your best lead at this point. I usually open up a fullscreen console and tail -f the log file(s), then watch them like a hawk every time you shift a pixel in the code. It&#8217;s not as tedious as it sounds.</p>
<p>Another thing that helps is knowing the language/framework the code is written in. You should know at least how to read the language &#8211; though if you&#8217;re dedicated, I&#8217;m sure you can muddle through with a crash course/reference sheet. By knowing the lang/framework, you can find or create some functions to help you debug and examine objects (even just <strong>raise object.inspect</strong> is great) in detail. You can also examine other aspects of the code. One thing I do early on is <strong>rake routes &gt; ROUTES</strong>. This saves all the routes to a file which I can glance at if necessary. And <em>rake routes</em> takes forever on my ancient machine.</p>
<p>After this, the code is compiled/started and I&#8217;m <em>ready</em> when I can interact with it live. Now I begin actually reading the code. If I have a specific update I want to make, be it adding a new feature, fixing a bug, or seeing how something works, I work backwards from the live app tracing everything going on around the component I&#8217;d like to edit/update.</p>
<p>This is not easy, not quick, and very error prone when you&#8217;re making updates. Things breaking == You learning, and all you need to do is <strong>be persistent</strong>. It&#8217;s like reading a calculus book. Nothing will make sense until you turn over the page and see how the problems and answers are laid out, and begin doing them on your own. It&#8217;s a great feeling when 2-3 days later you realize that you have a pretty good view of the app&#8217;s design (unless it&#8217;s something like the Kernel).</p>
<p>No matter how cryptic the code looks, don&#8217;t be turned off. Is there some sort of force that will completely stop you from understanding what the logic says? Can other people look at this code and instantly tell you what it&#8217;s doing? (well, they can form a great guess if they have the experience). It&#8217;s time consuming but the concepts apply to other things in life. You can understand how to change your oil without knowing how the engine works, or how to make simple CSS/HTML changes to a layout to change the background or font size without knowing every single CSS tag and technique.</p>
<p><strong>define:persistent: </strong><em>Refusing to give up, especially when faced with opposition or difficulty; continuing firmly or steadily</em></p>
<p>Most of my favorite quotes are about it. It&#8217;s unfortunate my memory isn&#8217;t very persistent.</p>
<p style="text-align: center;"><img class="size-full wp-image-271 aligncenter" title="Persistence is the key to money, fame, power and sex." src="http://biodegradablegeek.com/wp-content/uploads/2008/11/persistenceposter.jpg" alt="persistenceposter Learning to Read and Grok Other Peoples Code" width="400" height="336" /></p>
<p style="text-align: center;">
<h2 style="text-align: left;"><span style="color: #ff6600;">Do you have any special techniques/advice for learning to read third party code?</span></h2>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2008/11/learning-to-read-and-grok-other-peoples-code/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Script to Quickly Setup WebApp Environment and Domain</title>
		<link>http://biodegradablegeek.com/2008/10/script-to-quickly-setup-webapp-environment-and-domain/</link>
		<comments>http://biodegradablegeek.com/2008/10/script-to-quickly-setup-webapp-environment-and-domain/#comments</comments>
		<pubDate>Sun, 12 Oct 2008 04:10:32 +0000</pubDate>
		<dc:creator>Isam</dc:creator>
				<category><![CDATA[Automation]]></category>
		<category><![CDATA[Code]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Ruby]]></category>
		<category><![CDATA[code]]></category>
		<category><![CDATA[deployment]]></category>
		<category><![CDATA[joomla]]></category>
		<category><![CDATA[phpmotion]]></category>
		<category><![CDATA[rio]]></category>
		<category><![CDATA[scripting]]></category>
		<category><![CDATA[Scripts]]></category>
		<category><![CDATA[warez]]></category>

		<guid isPermaLink="false">http://biodegradablegeek.com/?p=240</guid>
		<description><![CDATA[Just sharing a script I wrote to quickly deploy WordPress (and eventually a few other webapps) sites, which somebody might find useful. This uses Linode&#8216;s API* to add the domain name to the DNS server along with some subdomains. If you&#8217;re using another server, (Slicehost, your own, etc), you can alter the dns class to [...]]]></description>
			<content:encoded><![CDATA[<p>Just sharing a script I wrote to quickly deploy WordPress (and eventually a few other webapps) sites, which somebody might find useful. This uses <a href="http://linode.com">Linode</a>&#8216;s API* to add the domain name to the DNS server along with some subdomains. If you&#8217;re using another server, (Slicehost, your own, etc), you can alter the dns class to use that API, or just ignore the DNS stuff completely; Its optional.</p>
<p>This will be updated periodically as I refactor and add support for more apps (notably Joomla and Clipshare &#8211; though this would violate their terms unless you have the unlimited license). This was written primarily because I couldn&#8217;t stand setting up another vhost and WordPress installation. There are plenty of existing deployers but I plan on adding very specific features and tweaking this for in-house work. I also wanted to try Rio (Ruby-IO). GPL license. Go nuts.</p>
<p><em>* As of 10/11, the apicore.rb file on the site has some syntactic errors in the domainResourceSave method. I sent an email out to the author about it. Problems aren&#8217;t major. You can get <a href="http://biodegradablegeek.com/wp-content/uploads/2008/10/apicore.rb" target="_new">my apicore.rb here</a>.</em></p>
<p>This won&#8217;t run unless you create the appropriate folder structure in /etc/mksite/. I&#8217;ll get going on this in a bit. See the code below:</p>
<p><span id="more-240"></span></p>
<pre lang="ruby">#!/usr/bin/env ruby 

########################################################
# Isam M.
# http://biodegradablegeek.com
# MKSITE.RB (0.2) Last updated Oct 19th, 2008
#
# mksite makes it quicker to setup sites and web
# apps by doing most of the tedious work.
#
#
#                     UNSTABLE
#           Run it in your imagination,
#              not on your system!
#
#
######################################################## 

require 'rubygems'
require 'rio'
require 'yaml'
require 'mysql'
require 'highline/import'
HighLine.track_eof = false

begin
  require 'apicore'
rescue LoadError
  puts "NOTICE: Unable to load apicore.rb - domains will not be added automatically"
end

Apache_sites = '/etc/apache2/sites-available/'
Subdomains = ['', 'www', 'mail', 'blog', "dev#{(rand*10).floor}"]
Testing = false
Homedir = ENV['HOME']
Username = ENV['USER']
Applications = [:Skeleton, :Clipshare, :Joomla, :PHPMotion, 'PmWiki (N/A)', :Wordpress]
$config = nil
$log = nil

def say(txt)
  super txt
#  $log &lt;&lt; txt if $log
end

# flash('message', :notice || :error) to output msg
def flash(msg, message_type = :notice)
 if message_type.eql? :notice
    say("INFORMATION: #{msg}")
  elsif message_type.eql? :emphasize
    say("***************************************************")
    say("NOTICE: #{msg}n")
    say("***************************************************")
  elsif message_type.eql? :error
    STDERR.puts("FATAL ERROR: #{msg}n")
  end
end

# DZone 2111
def genAlpha(size=64)
  s=''
  size.times {
    s &lt;&lt; (i = Kernel.rand(62); i += ((i &lt; 10) ? 48 : ((i &lt; 36) ? 55 : 61 ))).chr
  }
  s
end

if defined? ApiCore
  class DNSAPI &lt; ApiCore
    # This depends on the Linode API Ruby bindings (apicore.rb)
    def initialize(key, debug=false, batching=false)
      super
      @batching=false
    end

    # Add domain. Return ID on success, nil on failure
    def addDomain domain
      return nil if !domain
      params = {}
      params[:DomainID] = 0
      params[:Domain] = domain
      params[:Type] = 'master'
      params[:Status] = 1
      params[:SOA_Email] = getVal('email') || ask("Enter SOA email for the domain: ")
      domainSave params
    end

    def addDomainResource(domain, resource, target, record_type = 'A')
      return nil if (did = getDomainIdByName domain).nil?
      params = {}
      params[:ResourceID] = 0
      params[:DomainID] = did
      params[:Name] = resource
      params[:Type] = record_type
      params[:Target] = target
      domainResourceSave params
    end

    def getDomainIdByName domain
      domainList.find do |dom|
        return dom["DOMAINID"] if dom["DOMAIN"].downcase == domain.downcase
      end
    end
  end
else
  flash('ApiCore not loaded. Skipping DNS stuff', :notice)
end

class App
  class &lt;&lt; self; attr_reader :message, :name, :version, :description, :vhost; end
  @message = nil
  @name = nil
  @version = nil
  @description = 'Just Another Web App'
  @vhost = 'generic'

  def initialize(rootdir, domain, db)
    flash("Initializing application (dir=#{rootdir}, domain=#{domain})")
    @rootdir = (rootdir[-1].chr.eql? '/') ? rootdir.chop : rootdir
    @domain = domain
    @db = db # hash of database info 

    # System stuff
    #if Testing
      #@templates = '/home/kiwi/Code/mksite/templates'
      #@configs = '/home/kiwi/Code/mksite/configs'
      #@vhosts = '/home/kiwi/Code/mksite/vhosts'
    #else
      @templates = '/etc/mksite/templates'
      @configs = '/etc/mksite/configs'
      @vhosts = '/etc/mksite/vhosts'
    #end
  end

  # This generally does not need to be overriden.
  # It does 'generic shit' like creating the rootdir
  # and setting permissions
  def envSetup
    if !rio(@rootdir).exist?
      flash("Creating directory #{@rootdir}")
      rio(@rootdir).mkpath
    end

    rio(@rootdir).chdir do |root|
      # Copy the generic public/private/log apache
      # structure to rootdir
      flash("Changing working dir to #{@rootdir}")
      flash("Working inside '#{root.to_s}'")
      rio(@templates,'skeleton.www').each { |df|
        # Overwrite existing files? .. yes.
        #while rio(root, df).exist? do
        df &gt; root
        #flash("Copied #{df.to_s} to #{root.to_s}")
      } 

      # Set permissions (a+w on logs, etc)
      flash('Setting permissions...')
      flash('666 ./log/*.log')
      rio('./log/access.log').chmod(00666)
      rio('./log/error.log').chmod(00666)
      flash('700 ./private')
      rio('./private/').chmod(00700)
    end
  end

  def setup
    # envSetup()
    # databaseSetup()
    raise 'OVERRIDE ME'
    #flash("setup() template function invoked. 'OVERRIDE ME'", :log)
  end

  def databaseSetup
    # Create database if it doesn't already exist
    # This usually doesn't need to be overriden
    #Mysql.server_connect(@db['name'])
    #  flash('Checking database connection... ')
    begin
      dbo = Mysql.real_connect(@db['host'], @db['user'], @db['pass'])
      flash("Creating database '#{@db['name']}'");
      res = dbo.query("CREATE DATABASE IF NOT EXISTS #{@db['name']};")
      flash("Database server returned #{res}") if res
    rescue Mysql::Error =&gt; err
      flash('Unable to connect to access/create database', :error)
      flash("Error returned (#{err.errno}) = '#{err.error}'", :error)
      exit 1
    ensure
      dbo.close if dbo
    end
  end

  def dnsSetup
    return if !defined? ApiCore

    # Setup DNS - currently uses Linode API
    flash("Setting up DNS for '#{@domain}'")
    flash("This requires a Linode API keynLogin to linode.com and find it under 'My Profile'", :emphasize)
    dns = nil
    api_key = getVal('apikey')
    loop do
      api_key = ask('Paste your Linode API key (or 'skip'): ') if api_key.nil? || api_key.empty?
      break if api_key.downcase.eql? 'skip'

      # Check API key
      begin
        dns = DNSAPI.new api_key
        dns.domainList
        break
      rescue RuntimeError
        flash("API key invalid (or service down?). Learn to paste and try again.n")
        api_key = nil
      end
    end

    unless api_key.nil? or api_key.downcase.eql? 'skip'
      flash("Adding master domain '#{@domain}'")
      begin
        begin
          dns.addDomain(@domain)
        rescue RuntimeError
          flash('Unable to add domain (exists already?). Attempting to add subdomains...', :error)
        end

        server = getVal('server') ||
                   ask("Enter IP subdomains should point to (or 'skip'): ") { |q| q.default = 'skip' } 

        unless server.downcase.eql? 'skip'
          Subdomains.each do |sub|
            flash("Adding subdomain '#{sub}.#{@domain}' (points to #{server})")
            flash('Could not add subdomain', :error) if !dns.addDomainResource(@domain, sub, server)
          end
        end
     rescue
       raise
       flash("Unable to add domain/subdomain. Skipping", :error)
     end
    end
  end

  def serverSetup
    # Set Apache vhost
    vhost_file = (defined? self.class.vhost) ? self.class.vhost : 'generic'
    flash("Generating vhost file (#{@vhosts}/#{vhost_file}) for Apache 2.x")
    vhost = ''
    email = getVal('email') || ask("Enter a valid email for tech support: ")
    rio(@vhosts, vhost_file) &gt; vhost
    vhost.gsub!('_MKS_DOMAIN_', @domain)
    vhost.gsub!('_MKS_EMAIL_', email)
    vhost.gsub!('_MKS_ROOT_', "#{@rootdir}")
    vhost.gsub!('_MKS_PUBLIC_', "#{@rootdir}/public")
    #rio(Testing ? '/tmp/' : Apache_sites, @domain).puts(vhost)
    rio('/tmp/', @domain).puts(vhost) 

    # Your enemies should not read this
    rio('/tmp/', @domain).chmod(00600)
    flash("vhost file has been generated as /tmp/#{@domain}n
          It is YOUR responsibility to move this to #{Apache_sites}n
          Site will not work until you 'a2ensite &amp;&amp; apache2ctl restart'", :emphasize)
  end

  def postInstall()
    flash("Finished!", :emphasize)
  end
end

class Skeleton &lt; App
  @name = 'Skeleton'
  @description = 'Generic WWW directory structure'
  @message = ''
  @vhost = 'generic'

  def setup
    self.envSetup()
    self.dnsSetup()
    self.serverSetup()
  end
end

class WordPress &lt; App
  @name = 'Wordpress'
  @version = '2.6.2'
  @description = 'A popular blogging platform'
  @message = 'Trying to make monies on the Internets?'
  @vhost = 'generic'

  def setup
    # This sets up the initial environment / permissions
    self.envSetup() 

    # Copy the wordpress skeleton directory to the new dir
    wproot = rio(@rootdir,'public')
    flash("Wordpress root will be #{wproot}")
    flash('Copying WordPress data over... (may take awhile)')
    rio(@templates,'wordpress').each { |df| df &gt; wproot } 

    rio(wproot).chdir do
      rio('wp-config-sample.php').rm()
      # Generate and output wp config
      flash('Generating wp-config.php based on your DB settings...')
      wpcfg = rio(@configs, 'wordpress.cfg')
      if !wpcfg.exist? || !wpcfg.readable?
        flash("Wordpress config template missing or unreadable, quitting", :error)
        exit 2
      end

      # Copy the config into a string, do things with it and then write it to disk
      config = ''
      rio(@configs, 'wordpress.cfg') &gt; config
      flash("Warning: config file '#{@configs}/wordpress.cfg' is empty") if config.empty?
      config['_MKS_DB_HOST_'] = @db['host']
      config['_MKS_DB_USER_'] = @db['user']
      config['_MKS_DB_PASS_'] = @db['pass']
      config['_MKS_DB_NAME_'] = @db['name']
      config.gsub!('_MKS_SECRET_', genAlpha())
      flash("Writing #{wpcfg} data")
      rio('wp-config.php').w!.puts(config)
    end

    # Setup the database
    self.databaseSetup() 

    # Add DNS info
    self.dnsSetup() 

    # Setup the server/vhost
    self.serverSetup()
  end
end

class Clipshare &lt; App
  def setup
    raise 'Clipshare support is currently not available. sowwie'
  end
end

class Joomla &lt; App
  @name = 'Joomla'
  @version = '1.5.7'
  @description = 'A widely used Content Management System'
  @message = ''
  @vhost = 'generic'

  def setup
    # This sets up the initial environment / permissions
    self.envSetup() 

    # Copy the wordpress skeleton directory to the new dir
    flash('Copying Joomla data over... (may take awhile)')
    jooroot = rio(@rootdir,'public')
    flash("Joomla root will be #{jooroot}")
    rio(@templates,'joomla-1.5.7').each { |df| df &gt; jooroot } 

    rio(jooroot).chdir do
      flash('Removing stock Joomla config...')
      rio('configuration.php-dist').rm()

      # Generate and output joomla config
      flash('Generating configuration.php based on provided settings...')
      joocfg = rio(@configs, 'joomla.cfg')
      if !joocfg.exist? || !joocfg.readable?
        flash("Joomla config template missing or unreadable, quitting", :error)
        exit 2
      end

      # Copy the config into a string, do things with it and then write it to disk
      config = ''
      rio(@configs, 'joomla.cfg') &gt; config
      flash("Warning: config file '#{@configs}/joomla.cfg' is empty") if config.empty?
      config['_MKS_DB_HOST_'] = @db['host']
      config['_MKS_DB_USER_'] = @db['user']
      config['_MKS_DB_PASS_'] = @db['pass']
      config['_MKS_DB_NAME_'] = @db['name']
      config.gsub!('_MKS_DOMAIN_', @domain)
      config.gsub!('_MKS_SECRET_', genAlpha()) 

      flash("Writing ./configuration.php")
      rio('configuration.php').w!.puts(config) 

      flash("Making configuration.php world writable (0666)")
      rio('configuration.php').chmod(0666)
    end

    # Setup the database
    self.databaseSetup() 

    # Add DNS info
    self.dnsSetup() 

    # Setup the server/vhost
    self.serverSetup()
  end

  def postInstall()
    flash('Joomla is ready to be setup using the web interface', :emphasize)
    flash("Go to #{@domain} where the Joomla! web based installer will
           guide you through the rest of the installation")
    flash("Here's the database information:n
           USERNAME: #{@db['user']}n
           DB HOST : #{@db['host']}n
           DB NAME : #{@db['name']}") 

    flash("nAdmin panel is located @ #{@domain}/administrator ")
    flash("You can log into Admin using the username 'admin' along with the
    password that was generated or you chose during the web based install.")
    super
  end
end

class PHPMotion &lt; App
  def setup
    raise 'PHPMotion support is currently not available. sowwie'
  end
end

class Log
  def initialize(filename="/tmp/#{Username}_mksite.log")
    @log = rio(filename)
    n=0; @log = rio("#{filename}.#{n+=1}") while @log.exist?
    @log.puts("# GENERATED BY MKS - BEGAN @ #{Time.now.to_i}")
    @log.chmod(00600)
    flash("Log file generated (important): #{@log.to_s} (no worries, set to 600)", :emphasize)
    return @log
  end

  # append to log
  def &lt;&lt;(data)
    #super &lt;&lt;(data)
    @log.puts(data)
    @log.puts("n")
    #@log &lt;&lt; data &lt;&lt; "n"
  end
end

def loadConfig()
  cfgpath = "#{ENV['HOME']}/.mksite"
  return YAML.load(rio(cfgpath).read()) if rio(cfgpath).exist?
  nil
end

def getVal(key, default=nil)
  return $config[key] if $config and defined? $config[key]
  default
end

def main
  exit 1 if 'root'==ENV['USER']
  if ($config = loadConfig())
    flash('~/.mksite config loaded')
  else
    flash('~/.mksite not found. It's fine, I'll annoy you with questions.')
  end

  puts $config.inspect if Testing
  flash('Press ^C (CTRL+C) at any time quit', :emphasize)
  # Let's ask neutral questions about the new site.
  if Testing
    rootdir = '/tmp/sandbox9/'
    domain = 'domain.cxm'
  else
    domain = ask('Enter site's domain name (no http:// or www): ') #{ |d| d.validate = !! /^www.|^http:/// }
    rootdir = ask('Root site directory (leave blank for default): ') { |q|
      q.default = "/home/#{Username}/www/#{domain}/"
      q.validate = /^/home/#{Username}//
    }
  end

  flash("Domain has been set to "#{domain}"")
  #$log = Log.new("/tmp/mks_#{domain}.log") 

  flash("Site will reside in "#{rootdir}", and the index/script")
  flash(" files (index.php, .htaccess etc) will go in "#{rootdir}/public/"nn")

  # Ask the user what app she wants to install and specific questions about that app
  app = nil
  choose do |menu|
    menu.prompt = 'Choose the software for your new site: '
    Applications.each do |app|
      menu.choice app do |a|
        app = a
      end
    end
  end

  flash("You chose #{app}. #{Kernel.const_get(app).message}n") 

  # Fetch DB info if this app needs a database
  # XXX Should just have a db flag in the App's class
  db = nil
  unless app.eql? :Skeleton
    flash('This app requires a database (only MySQL supported)')
    flash('It will be created if it doesn't exist')

    db = {}
    db['host'] = 'localhost'
    loop do
      if Testing
        db['user'] = 'kiwi'
        db['pass'] = ''
        db['name'] = 'kiwi_sandbox9'
      else
        db['user'] = getVal('db_user') || ask('Enter MySQL username: ')
        db['pass'] = getVal('db_pass') || ask('Enter MySQL password: ') { |q| q.echo = '*'}
        db['name'] = ask('Enter database name (should begin w/ your 'username_')') { |dbn| dbn.validate = /^#{Username}_/}
        puts db.inspect
      end
      flash("Please double check: mysql://#{db['user']}:#{'*' * (db['pass']).size}@#{db['host']}/#{db['name']}")
      break if Testing || agree('Is this correct?')
      flash("Please re-enter database infon")
    end
  end

  # Check permissions and database login
  flash('Doing a preliminary check', :emphasize)
  flash('Checking installation environment')
  # Quit if the directory exists and the user does not want to go ahead
  unless Testing
    exit 1 if rio(rootdir).exist? &amp;&amp; !agree("Directory '#{rootdir}' exists! Continue? (Y/N)")
  end

  # Is the parent directory available and writable?
  parentdir = rio(rootdir).dirname
  unless parentdir.exist? &amp;&amp; parentdir.writable?
    flash("Directory '#{parentdir.to_s}' either doesn't exist or is not writable. Check permissions", :error)
    exit 1
  end

  unless db.nil? || false==getVal('db_confirm')
    # Check DB connection - but this does not check if user has any privileges
    flash('Checking database connection')
    begin
      dbo = Mysql.real_connect(db['host'], db['user'], db['pass'])
      flash("Successfully connected to '#{dbo.get_server_info}'")
    rescue Mysql::Error =&gt; err
      flash('Unable to connect to database server', :error)
      flash("Error returned (#{err.errno}) = '#{err.error}'", :error)
      exit 1
    ensure
      dbo.close if dbo
    end
  end

  # Begin installation
  flash("Word on the server racks is... you're good to go", :emphasize)
  klass = Kernel.const_get(app)
  webapp = klass.new(rootdir, domain, db)
  webapp.setup()
  webapp.postInstall()
end

main()</pre>
<p><a href="http://biodegradablegeek.com/wp-content/uploads/2008/10/mksite.rb" target="_new"><strong>Download</strong> the ASCII mksite.rb file here.</a></p>
]]></content:encoded>
			<wfw:commentRss>http://biodegradablegeek.com/2008/10/script-to-quickly-setup-webapp-environment-and-domain/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>

