#!/usr/local/bin/perl -w use CGI qw(:all); use CGI::Carp; use LWP::UserAgent; use HTML::TreeBuilder; use Spreadsheet::ParseExcel; use GeoDB; use strict; my $gGeo = GeoDB->new(); $gGeo->load(); my $c = param('c'); # container die("Missing params.\n") unless (defined $c); print header('application/vnd.google-earth.kml+xml'); my @stops = track(param('c')); print << "%%" Stops $stops[0]->{'long'} $stops[0]->{'lat'} 6371000 %% ; my @coord; foreach my $stop (@stops) { my $coord = "$stop->{'long'},$stop->{'lat'},0"; push @coord, $coord; my $icon; # dates with parens are the end indicate estiamted or planned # future things, and thus get a default icon instead of a green one if ($stop->{'arrive-dt'} =~ /\)$/) { $icon = 'default'; } else { # the container has already passed this location $icon = 'green'; } my $action = ''; $action = "($stop->{'action'})" if (defined $stop->{'action'}); print << "%%"; $stop->{'name'} {'arrive-dt'}, Depart: $stop->{'depart-dt'}, Via: $stop->{'vessel'} $action ]]> Arrive: $stop->{'arrive-dt'}, Depart: $stop->{'depart-dt'} $coord $stop->{'lat'} $stop->{'long'} 14000 %% if ($icon eq 'green') { print << "%%"; %% } print "\n"; } # Show the route on the map as lines print "Lines1"; print join("\n", @coord); print ""; print ""; print "\n"; exit; sub track { my($c) = @_; my $carrier = findCarrier($c); if (defined $carrier) { &{$carrier}($c); } else { die("Unknown carrier $carrier.\n"); } } sub findCarrier { # this is hard-coded for the demo. Could stay hard-coded in production # because we can only handle websites we've already learned how to # scrape anyway... my($c) = @_; $c = substr($c, 0, 4); my %map = ( 'NYKU' => \&trackNYKU, 'PONU' => \&trackMaersk, 'TEXU' => \&trackMaersk, 'TCKU' => \&trackMaersk, ); return $map{$c}; } sub val { ($_[0]->{Cells}[$_[1]][$_[2]])->{Val} }; sub trackNYKU { my($c) = @_; my @res; my ($f) = "/tmp/track.$$.xls"; system("wget -q -O $f 'https://www2.nykline.com/ct/containerSearchDownload.nyk?lang=en&method=securityCheck&logid=997292&searchNumbers=$c,'"); my $oBook = Spreadsheet::ParseExcel::Workbook->Parse($f); my($iR, $iC, $oWkS, $oWkC); foreach my $oWkS (@{$oBook->{Worksheet}}) { for(my $iR = $oWkS->{MinRow}; defined $oWkS->{MaxRow} && $iR <= $oWkS->{MaxRow}; $iR++) { my $col1 = val($oWkS, $iR, 1); if ($col1 && (substr($col1,0,7) ne "Contain")) { my $stop = {}; $stop->{'name-in'} = val($oWkS, $iR, 7); $stop->{'vessel'}= val($oWkS, $iR, 8); $stop->{'arrive-dt'} = nykuDate(val($oWkS, $iR, 5)); # this is not right, need to correlate the NYK events someday # seeing as how Maersk also uses events, need to change # over to passing evens back, then correlate them in general $stop->{'depart-dt'} = $stop->{'arrive-dt'}; findStop($stop); push @res, $stop; } } # only care about the first sheet last; } unlink($f); return @res; } sub trackMaersk { my($c) = @_; my @res; open(M, "wget -q -O- 'http://www.maerskline.com/appmanager/maerskline/public?_nfpb=true&_nfls=false&_pageLabel=page_tracking3_trackSimple' |"); my $url; while () { if (/^
new; $tree->parse_file($f); my @row = $tree->look_down('id', 'overview_row_1'); my @a = $row[0]->look_down('_tag', 'a'); $url = $a[0]->attr('href'); #warn("Got URL $url"); unlink($f); system("wget -q -O $f '$url'"); $tree = HTML::TreeBuilder->new; $tree->parse_file($f); my @t = $tree->look_down('_tag', 'table', 'class', 'lstBox'); my @r = $t[1]->look_down('_tag', 'tr'); shift @r; shift @r; foreach my $r (@r) { my @col = $r->look_down('_tag', 'td'); my $stop = {}; $stop->{'action'} = trim($col[0]->as_text); $stop->{'name-in'} = trim($col[1]->as_text); $stop->{'vessel'}= trim($col[3]->as_text); $stop->{'arrive-dt'} = mDate(trim($col[2]->as_text)); $stop->{'depart-dt'} = $stop->{'arrive-dt'}; findStop($stop); push @res, $stop; } unlink($f); return @res; } sub trackApl { die("Not recently tested."); my($num) = @_; # fetch this first to set the session cookie, the # the session somehow saves the desired search string. Ugh. my($url) = "http://homeport.apl.com/gentrack/trackingMain.do?trckSingleInput=$num"; my $ua = LWP::UserAgent->new; $ua->cookie_jar( {} ); my $res = $ua->request(HTTP::Request->new(GET => $url)); die("Failed to fetch $url.\n") unless $res->is_success; $url = 'http://homeport.apl.com/gentrack/blRoutingFrame.do'; $res = $ua->request(HTTP::Request->new(GET => $url)); die("Failed to fetch $url.\n") unless $res->is_success; my $tree = HTML::TreeBuilder->new; $tree->parse($res->decoded_content()); my @tbls = $tree->look_down('_tag', 'table'); die("Could not find table.\n") unless @tbls; my $tbl = $tbls[3]; my(@rows) = $tbl->look_down('_tag', 'tr'); # ditch header row shift @rows; my @res; my($route, $loc, $vessel, $arrive_dt, $depart_dt); foreach my $row (@rows) { my(@cols) = $row->look_down('_tag', 'td'); my $stop = {}; $stop->{'name-in'} = trim($cols[1]->as_text); $stop->{'vessel'}= trim($cols[2]->as_text); $stop->{'arrive-dt'} = aplDate(trim($cols[3]->as_text)); $stop->{'depart-dt'} = aplDate(trim($cols[4]->as_text)); findStop($stop); push @res, $stop; } return @res; } # no translation needed for now sub mDate { my $dt = shift; return $dt; } # no translation needed for now sub nykuDate { my $dt = shift; return $dt; } # translates the abbreviations APL uses into human readable strings sub aplDate { my $dt = shift; # just remove these: # A = actual # L = containers loaded here # D = containers unloaded here if ($dt =~ s/\s+[ALD]$//) { # just remove the a (actual) } elsif ($dt =~ s/\s+E//) { $dt .= " (estimated)"; } elsif ($dt =~ s/\s+P//) { $dt .= " (planned)"; } return $dt; } sub trim { $_[0] =~ s/^\s*//; $_[0] =~ s/\s*$//; $_[0] =~ s/[^\w]*$//; return $_[0]; } my $gUnk = 0; sub findStop { my $stop = shift; my ($name, $lat, $long) = $gGeo->find($stop->{'name-in'}); if (! defined $name) { $stop->{'name'} = $name; $stop->{'lat'} = 0; $stop->{'long'} = $gUnk; $gUnk += 10; warn("Could not find lat/long for '$stop->{'name-in'}'.\n"); } else { $stop->{'name'} = $name; $stop->{'lat'} = $lat; $stop->{'long'} = $long; } }