Global Temperature Readings in MySQL form

I enjoy reading about data visualization and processing on blrpnt.com. Today there's a post about a big data release of global temperature readings, in response to the current (and totally absurd) hockey-stick debate. Anyway, I whipped up a perl script to turn the very weird text files into somewhat more useful SQL data. Here's the script, and the data is linked below.


#! /usr/bin/perl -w
    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
        if 0; #$running_under_some_shell

use strict;
use File::Find ();
use File::Basename;

# Set the variable $File::Find::dont_use_nlink if you're using AFS,
# since AFS cheats.

# for the convenience of &wanted calls, including -eval statements:
use vars qw/*name *dir *prune/;
*name   = *File::Find::name;
*dir    = *File::Find::dir;
*prune  = *File::Find::prune;

my %columns = (
	"Number" => "id",
	"Name" => "name",
	"Country" => "country",
	"Lat" => "lat",
	"Long" => "lon",
	"Height" => "height",
	"Start year" => "start_year",
	"End year" => "end_year",
	"First Good year" => "first_good_year",
	"Source ID" => "source_id",
	"Source file" => "source_file",
	"Jones data to" => "jones_data_to",
	"Normals source" => "normals_source",
	"Normals source start year" =>"normals_start",
	"Normals source end year" => "normals_end",
	"Normals source variable code" => "normals_src_variable",
	"Normals source percent availability" => "normals_src_pct_available",
	"Normals" => "normals",
	"Standard deviations source" => "std_dev_source",
	"Standard deviations source start year" => "std_dev_start",
	"Standard deviations source end year" => "std_dev_end",
	"Standard deviations" => "std_dev"
);

sub wanted;

# Traverse desired filesystems
File::Find::find({wanted => \&wanted}, '/opt/home/colin/Downloads/yipes/');
exit;


sub wanted {
    my ($dev,$ino,$mode,$nlink,$uid,$gid);

    (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
    -f _
	&& /\d\d\d\d\d\d/
	&& parse_file($name);
}

sub parse_file {
	my ( $name ) = @_;
	my $id = basename($name);

	my %attr;


	open(DATA, "< $name") || die("could not open $name -- $!");
	while(<DATA>) {
		chomp;

		$attr{'id'} = $id;

		if (/=/) {
			my ($key, $value) = split("=");
			$key = trim($key);

			if ( exists $columns{$key} ) {
				$attr{$columns{$key}} = trim($value);
				#print $columns{$key} . " -> " . $attr{$columns{$key}} . "\n";
			}
			else {
				#print "$key\n";
			}
		}
		else {
			#1943  -2.9  -3.8  -0.1   1.8   3.7   8.4  10.5   6.7   6.9   2.7   0.8   1.6

			my @temps = split;
			my $year = shift @temps;

			my $month = 1;
			foreach my $tmp (@temps) {
				#print "$year - $month - $tmp\n";
				my $tmpdate = "$year-$month-01";
				if ( $month < 10 ) {
					$tmpdate = "$year-0$month-01";
				}
				print "INSERT INTO readings SET station_id = '$id', theyear = $year, themonth = $month, thedate = '$tmpdate', val = $tmp;\n";
				$month++;
			}
		}
	}

		my @tmpsql;
		foreach my $key (keys %attr) {
			my $val = quotemeta($attr{$key});
			push @tmpsql, "$key='$val'";
		}

		print "INSERT INTO stations SET " . join(", ", @tmpsql) . ";\n";

	close DATA;
}

sub trim($)
{
	my $string = shift;
	$string =~ s/^\s+//;
	$string =~ s/\s+$//;
	return $string;
}

Filed under: Code, data