Global Temperature Readings in MySQL form

I enjoy reading about data visualization and processing on blrpnt.com. Today there's a post about a big data release of global temperature readings, in response to the current (and totally absurd) hockey-stick debate. Anyway, I whipped up a perl script to turn the very weird text files into somewhat more useful SQL data. Here's the script, and the data is linked below.

  1
  2#! /usr/bin/perl -w
  3    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
  4        if 0; #$running_under_some_shell
  5
  6use strict;
  7use File::Find ();
  8use File::Basename;
  9
 10# Set the variable $File::Find::dont_use_nlink if you're using AFS,
 11# since AFS cheats.
 12
 13# for the convenience of &wanted calls, including -eval statements:
 14use vars qw/*name *dir *prune/;
 15*name   = *File::Find::name;
 16*dir    = *File::Find::dir;
 17*prune  = *File::Find::prune;
 18
 19my %columns = (
 20        "Number" => "id",
 21        "Name" => "name",
 22        "Country" => "country",
 23        "Lat" => "lat",
 24        "Long" => "lon",
 25        "Height" => "height",
 26        "Start year" => "start_year",
 27        "End year" => "end_year",
 28        "First Good year" => "first_good_year",
 29        "Source ID" => "source_id",
 30        "Source file" => "source_file",
 31        "Jones data to" => "jones_data_to",
 32        "Normals source" => "normals_source",
 33        "Normals source start year" =>"normals_start",
 34        "Normals source end year" => "normals_end",
 35        "Normals source variable code" => "normals_src_variable",
 36        "Normals source percent availability" => "normals_src_pct_available",
 37        "Normals" => "normals",
 38        "Standard deviations source" => "std_dev_source",
 39        "Standard deviations source start year" => "std_dev_start",
 40        "Standard deviations source end year" => "std_dev_end",
 41        "Standard deviations" => "std_dev"
 42);
 43
 44sub wanted;
 45
 46# Traverse desired filesystems
 47File::Find::find({wanted => \&wanted}, '/opt/home/colin/Downloads/yipes/');
 48exit;
 49
 50
 51sub wanted {
 52    my ($dev,$ino,$mode,$nlink,$uid,$gid);
 53
 54    (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
 55    -f _
 56        && /\d\d\d\d\d\d/
 57        && parse_file($name);
 58}
 59
 60sub parse_file {
 61        my ( $name ) = @_;
 62        my $id = basename($name);
 63
 64        my %attr;
 65
 66
 67        open(DATA, "< $name") || die("could not open $name -- $!");
 68        while(<DATA>) {
 69                chomp;
 70
 71                $attr{'id'} = $id;
 72
 73                if (/=/) {
 74                        my ($key, $value) = split("=");
 75                        $key = trim($key);
 76
 77                        if ( exists $columns{$key} ) {
 78                                $attr{$columns{$key}} = trim($value);
 79                                #print $columns{$key} . " -> " . $attr{$columns{$key}} . "\n";
 80                        }
 81                        else {
 82                                #print "$key\n";
 83                        }
 84                }
 85                else {
 86                        #1943  -2.9  -3.8  -0.1   1.8   3.7   8.4  10.5   6.7   6.9   2.7   0.8   1.6
 87
 88                        my @temps = split;
 89                        my $year = shift @temps;
 90
 91                        my $month = 1;
 92                        foreach my $tmp (@temps) {
 93                                #print "$year - $month - $tmp\n";
 94                                my $tmpdate = "$year-$month-01";
 95                                if ( $month < 10 ) {
 96                                        $tmpdate = "$year-0$month-01";
 97                                }
 98                                print "INSERT INTO readings SET station_id = '$id', theyear = $year, themonth = $month, thedate = '$tmpdate', val = $tmp;\n";
 99                                $month++;
100                        }
101                }
102        }
103
104                my @tmpsql;
105                foreach my $key (keys %attr) {
106                        my $val = quotemeta($attr{$key});
107                        push @tmpsql, "$key='$val'";
108                }
109
110                print "INSERT INTO stations SET " . join(", ", @tmpsql) . ";\n";
111
112        close DATA;
113}
114
115sub trim($)
116{
117        my $string = shift;
118        $string =~ s/^\s+//;
119        $string =~ s/\s+$//;
120        return $string;
121}
122
Filed under: Code, data