Skip to Navigation

Muffinlabs!

Global Temperature Readings in MySQL form

I enjoy reading about data visualization and processing on blrpnt.com. Today there's a post about a big data release of global temperature readings, in response to the current (and totally absurd) hockey-stick debate. Anyway, I whipped up a perl script to turn the very weird text files into somewhat more useful SQL data. Here's the script, and the data is linked below.

#! /usr/bin/perl -w
    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
        if 0; #$running_under_some_shell

use strict;
use File::Find ();
use File::Basename;

# Set the variable $File::Find::dont_use_nlink if you're using AFS,
# since AFS cheats.

# for the convenience of &wanted calls, including -eval statements:
use vars qw/*name *dir *prune/;
*name   = *File::Find::name;
*dir    = *File::Find::dir;
*prune  = *File::Find::prune;

my %columns = (
        "Number" => "id",
        "Name" => "name",
        "Country" => "country",
        "Lat" => "lat",
        "Long" => "lon",
        "Height" => "height",
        "Start year" => "start_year",
        "End year" => "end_year",
        "First Good year" => "first_good_year",
        "Source ID" => "source_id",
        "Source file" => "source_file",
        "Jones data to" => "jones_data_to",
        "Normals source" => "normals_source",
        "Normals source start year" =>"normals_start",
        "Normals source end year" => "normals_end",
        "Normals source variable code" => "normals_src_variable",
        "Normals source percent availability" => "normals_src_pct_available",
        "Normals" => "normals",
        "Standard deviations source" => "std_dev_source",
        "Standard deviations source start year" => "std_dev_start",
        "Standard deviations source end year" => "std_dev_end",
        "Standard deviations" => "std_dev"
);




sub wanted;

# Traverse desired filesystems
File::Find::find({wanted => \&wanted}, '/opt/home/colin/Downloads/yipes/');
exit;


sub wanted {
    my ($dev,$ino,$mode,$nlink,$uid,$gid);

    (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
    -f _
        && /\d\d\d\d\d\d/
        && parse_file($name);
}

sub parse_file {
        my ( $name ) = @_;
        my $id = basename($name);

        my %attr;


        open(DATA, "< $name") || die("could not open $name -- $!");
        while(<DATA>) {
                chomp;

                $attr{'id'} = $id;

                if (/=/) {
                        my ($key, $value) = split("=");
                        $key = trim($key);

                        if ( exists $columns{$key} ) {
                                $attr{$columns{$key}} = trim($value);
                                #print $columns{$key} . " -> " . $attr{$columns{$key}} . "\n";
                        }
                        else {
                                #print "$key\n";
                        }
                }
                else {
                        #1943  -2.9  -3.8  -0.1   1.8   3.7   8.4  10.5   6.7   6.9   2.7   0.8   1.6

                        my @temps = split;
                        my $year = shift @temps;

                        my $month = 1;
                        foreach my $tmp (@temps) {
                                #print "$year - $month - $tmp\n";
                                my $tmpdate = "$year-$month-01";
                                if ( $month < 10 ) {
                                        $tmpdate = "$year-0$month-01";
                                }
                                print "INSERT INTO readings SET station_id = '$id', theyear = $year, themonth = $month, thedate = '$tmpdate', val = $tmp;\n";
                                $month++;
                        }
                }
        }

                my @tmpsql;
                foreach my $key (keys %attr) {
                        my $val = quotemeta($attr{$key});
                        push @tmpsql, "$key='$val'";
                }

                print "INSERT INTO stations SET " . join(", ", @tmpsql) . ";\n";

        close DATA;
}

sub trim($)
{
        my $string = shift;
        $string =~ s/^\s+//;
        $string =~ s/\s+$//;
        return $string;
}

AttachmentSize
temp-readings-120909.sql_.gz108.26 KB