Global Temperature Readings in MySQL form
I enjoy reading about data visualization and processing on blrpnt.com. Today there's a post about a big data release of global temperature readings, in response to the current (and totally absurd) hockey-stick debate. Anyway, I whipped up a perl script to turn the very weird text files into somewhat more useful SQL data. Here's the script, and the data is linked below.
#! /usr/bin/perl -w
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; #$running_under_some_shell
use strict;
use File::Find ();
use File::Basename;
# Set the variable $File::Find::dont_use_nlink if you're using AFS,
# since AFS cheats.
# for the convenience of &wanted calls, including -eval statements:
use vars qw/*name *dir *prune/;
*name = *File::Find::name;
*dir = *File::Find::dir;
*prune = *File::Find::prune;
my %columns = (
"Number" => "id",
"Name" => "name",
"Country" => "country",
"Lat" => "lat",
"Long" => "lon",
"Height" => "height",
"Start year" => "start_year",
"End year" => "end_year",
"First Good year" => "first_good_year",
"Source ID" => "source_id",
"Source file" => "source_file",
"Jones data to" => "jones_data_to",
"Normals source" => "normals_source",
"Normals source start year" =>"normals_start",
"Normals source end year" => "normals_end",
"Normals source variable code" => "normals_src_variable",
"Normals source percent availability" => "normals_src_pct_available",
"Normals" => "normals",
"Standard deviations source" => "std_dev_source",
"Standard deviations source start year" => "std_dev_start",
"Standard deviations source end year" => "std_dev_end",
"Standard deviations" => "std_dev"
);
sub wanted;
# Traverse desired filesystems
File::Find::find({wanted => \&wanted}, '/opt/home/colin/Downloads/yipes/');
exit;
sub wanted {
my ($dev,$ino,$mode,$nlink,$uid,$gid);
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
-f _
&& /\d\d\d\d\d\d/
&& parse_file($name);
}
sub parse_file {
my ( $name ) = @_;
my $id = basename($name);
my %attr;
open(DATA, "< $name") || die("could not open $name -- $!");
while(<DATA>) {
chomp;
$attr{'id'} = $id;
if (/=/) {
my ($key, $value) = split("=");
$key = trim($key);
if ( exists $columns{$key} ) {
$attr{$columns{$key}} = trim($value);
#print $columns{$key} . " -> " . $attr{$columns{$key}} . "\n";
}
else {
#print "$key\n";
}
}
else {
#1943 -2.9 -3.8 -0.1 1.8 3.7 8.4 10.5 6.7 6.9 2.7 0.8 1.6
my @temps = split;
my $year = shift @temps;
my $month = 1;
foreach my $tmp (@temps) {
#print "$year - $month - $tmp\n";
my $tmpdate = "$year-$month-01";
if ( $month < 10 ) {
$tmpdate = "$year-0$month-01";
}
print "INSERT INTO readings SET station_id = '$id', theyear = $year, themonth = $month, thedate = '$tmpdate', val = $tmp;\n";
$month++;
}
}
}
my @tmpsql;
foreach my $key (keys %attr) {
my $val = quotemeta($attr{$key});
push @tmpsql, "$key='$val'";
}
print "INSERT INTO stations SET " . join(", ", @tmpsql) . ";\n";
close DATA;
}
sub trim($)
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}