#!/usr/bin/perl
use JSON::XS;
use List::Util qw(shuffle);
use Digest::MD5 qw(md5_hex);
open(OUTTXT, ">$ARGV[2]-CONTEXTSHEET.TXT"); binmode(OUTTXT, ":utf8");
open(OUTHTML, ">$ARGV[2]-CONTEXTSHEET.html"); binmode(OUTHTML, ":utf8");
print OUTHTML "
$ARGV[2] Context Sheet\n";
print OUTHTML "\n";
print OUTHTML "Clip | Topics | EXIF Details | Web Appearances |
\n";
foreach $file (glob($ARGV[0])){
print "Processing $file...\n";
$json = ''; open(FILE, $file); read(FILE, $json, (-s FILE)); close(FILE);
undef($JSONREF); eval { $JSONREF = decode_json $json; };
if (!defined($JSONREF)) { print "BAD($file)\n"; next; }
($FRAMEID) = $file=~/\-(\d\d\d\d\d\d)\./;
$OFFSET = $FRAMEID+0; #convert to number (no zeropadding)...
$SHOWID = $file; $SHOWID=~s/^.*\///; $SHOWID=~s/\-\d\d\d\d\d\d\.json//;
$THUMBURL = 'http://data.gdeltproject.org/televisionexplorer/thumbnails/' . $SHOWID . '-' . sprintf("%06d", $FRAMEID) . '.jpg';
$TOPICS = ''; $TOPICSHTML = '';
foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'webEntities'}}) {
if (length($entry->{'description'}) > 1) { $TOPICS .= $entry->{'description'} . ','; $TOPICSHTML .= '' . $entry->{'description'}; }
}
chop($TOPICS); $TOPICS=~s/\s+/ /gs;
$FINAL_PHOTOGRAPHER = ''; $FINAL_DATETIMEORIG = ''; $FINAL_COPYRIGHT = ''; $FINAL_DESC = '';
foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'fullMatchingImages'}}) {
($photographer, $datetimeoriginal, $copyright, $desc) = &GetEXIF( md5_hex($entry->{'url'}) );
if (length($FINAL_PHOTOGRAPHER) < length($photographer)) { $FINAL_PHOTOGRAPHER = $photographer; }
if (length($FINAL_DATETIMEORIG) < length($datetimeoriginal)) { $FINAL_DATETIMEORIG = $datetimeoriginal; }
if (length($FINAL_COPYRIGHT) < length($copyright)) { $FINAL_COPYRIGHT = $copyright; }
if (length($FINAL_DESC) < length($desc)) { $FINAL_DESC = $desc; }
}
foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'partialMatchingImages'}}) {
($photographer, $datetimeoriginal, $copyright, $desc) = &GetEXIF( md5_hex($entry->{'url'}) );
if (length($FINAL_PHOTOGRAPHER) < length($photographer)) { $FINAL_PHOTOGRAPHER = $photographer; }
if (length($FINAL_DATETIMEORIG) < length($datetimeoriginal)) { $FINAL_DATETIMEORIG = $datetimeoriginal; }
if (length($FINAL_COPYRIGHT) < length($copyright)) { $FINAL_COPYRIGHT = $copyright; }
if (length($FINAL_DESC) < length($desc)) { $FINAL_DESC = $desc; }
}
print OUTTXT "$SHOWID\t$OFFSET\t$TOPICS\t$FINAL_DATETIMEORIG\t$FINAL_PHOTOGRAPHER\t$FINAL_COPYRIGHT\t$FINAL_DESC\t$THUMBURL\n";
###############
#do additional work to produce the HTML version...
#compile topics and EXIF into HTML format...
if ($TOPICSHTML ne '') { $TOPICSHTML = ""; }
$EXIF = '';
if ($FINAL_DATETIMEORIG ne '') { $EXIF .= "Date/Time: $FINAL_DATETIMEORIG
\n"; }
if ($FINAL_PHOTOGRAPHER ne '') { $EXIF .= "Photographer: $FINAL_PHOTOGRAPHER
\n"; }
if ($FINAL_COPYRIGHT ne '') { $EXIF .= "Copyright: $FINAL_COPYRIGHT
\n"; }
if ($FINAL_DESC ne '') { $EXIF .= "Description: $FINAL_DESC
\n"; }
#compile list of web links...
$WEBLINKS = ''; $wrote = 0;
foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'pagesWithMatchingImages'}}) {
if ($wrote++ < 10) { $WEBLINKS .= "{'url'}\">$entry->{'pageTitle'}
\n"; }
}
if ($WEBLINKS ne '') { $WEBLINKS = ""; }
#output...
print OUTHTML " | $TOPICSHTML | $EXIF | $WEBLINKS |
";
###############
}
close(OUTTXT);
print OUTHTML "