#!/usr/bin/perl use JSON::XS; use List::Util qw(shuffle); use Digest::MD5 qw(md5_hex); open(OUTTXT, ">$ARGV[2]-CONTEXTSHEET.TXT"); binmode(OUTTXT, ":utf8"); open(OUTHTML, ">$ARGV[2]-CONTEXTSHEET.html"); binmode(OUTHTML, ":utf8"); print OUTHTML "$ARGV[2] Context Sheet\n"; print OUTHTML "\n"; print OUTHTML "\n"; foreach $file (glob($ARGV[0])){ print "Processing $file...\n"; $json = ''; open(FILE, $file); read(FILE, $json, (-s FILE)); close(FILE); undef($JSONREF); eval { $JSONREF = decode_json $json; }; if (!defined($JSONREF)) { print "BAD($file)\n"; next; } ($FRAMEID) = $file=~/\-(\d\d\d\d\d\d)\./; $OFFSET = $FRAMEID+0; #convert to number (no zeropadding)... $SHOWID = $file; $SHOWID=~s/^.*\///; $SHOWID=~s/\-\d\d\d\d\d\d\.json//; $THUMBURL = 'http://data.gdeltproject.org/televisionexplorer/thumbnails/' . $SHOWID . '-' . sprintf("%06d", $FRAMEID) . '.jpg'; $TOPICS = ''; $TOPICSHTML = ''; foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'webEntities'}}) { if (length($entry->{'description'}) > 1) { $TOPICS .= $entry->{'description'} . ','; $TOPICSHTML .= '
  • ' . $entry->{'description'}; } } chop($TOPICS); $TOPICS=~s/\s+/ /gs; $FINAL_PHOTOGRAPHER = ''; $FINAL_DATETIMEORIG = ''; $FINAL_COPYRIGHT = ''; $FINAL_DESC = ''; foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'fullMatchingImages'}}) { ($photographer, $datetimeoriginal, $copyright, $desc) = &GetEXIF( md5_hex($entry->{'url'}) ); if (length($FINAL_PHOTOGRAPHER) < length($photographer)) { $FINAL_PHOTOGRAPHER = $photographer; } if (length($FINAL_DATETIMEORIG) < length($datetimeoriginal)) { $FINAL_DATETIMEORIG = $datetimeoriginal; } if (length($FINAL_COPYRIGHT) < length($copyright)) { $FINAL_COPYRIGHT = $copyright; } if (length($FINAL_DESC) < length($desc)) { $FINAL_DESC = $desc; } } foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'partialMatchingImages'}}) { ($photographer, $datetimeoriginal, $copyright, $desc) = &GetEXIF( md5_hex($entry->{'url'}) ); if (length($FINAL_PHOTOGRAPHER) < length($photographer)) { $FINAL_PHOTOGRAPHER = $photographer; } if (length($FINAL_DATETIMEORIG) < length($datetimeoriginal)) { $FINAL_DATETIMEORIG = $datetimeoriginal; } if (length($FINAL_COPYRIGHT) < length($copyright)) { $FINAL_COPYRIGHT = $copyright; } if (length($FINAL_DESC) < length($desc)) { $FINAL_DESC = $desc; } } print OUTTXT "$SHOWID\t$OFFSET\t$TOPICS\t$FINAL_DATETIMEORIG\t$FINAL_PHOTOGRAPHER\t$FINAL_COPYRIGHT\t$FINAL_DESC\t$THUMBURL\n"; ############### #do additional work to produce the HTML version... #compile topics and EXIF into HTML format... if ($TOPICSHTML ne '') { $TOPICSHTML = ""; } $EXIF = ''; if ($FINAL_DATETIMEORIG ne '') { $EXIF .= "Date/Time: $FINAL_DATETIMEORIG
    \n"; } if ($FINAL_PHOTOGRAPHER ne '') { $EXIF .= "Photographer: $FINAL_PHOTOGRAPHER
    \n"; } if ($FINAL_COPYRIGHT ne '') { $EXIF .= "Copyright: $FINAL_COPYRIGHT
    \n"; } if ($FINAL_DESC ne '') { $EXIF .= "Description: $FINAL_DESC
    \n"; } #compile list of web links... $WEBLINKS = ''; $wrote = 0; foreach $entry (@{$JSONREF->{'responses'}[0]->{'webDetection'}->{'pagesWithMatchingImages'}}) { if ($wrote++ < 10) { $WEBLINKS .= "
  • {'url'}\">$entry->{'pageTitle'}
    \n"; } } if ($WEBLINKS ne '') { $WEBLINKS = ""; } #output... print OUTHTML "
  • "; ############### } close(OUTTXT); print OUTHTML "
    ClipTopicsEXIF DetailsWeb Appearances
    $TOPICSHTML$EXIF$WEBLINKS
    \n"; close(OUTHTML); ############################################################################ sub GetEXIF() { my $photographer; my $datetimeoriginal; my $copyright; my $desc; open(EXIF, "$ARGV[1]/$_[0].txt"); binmode(EXIF, ":utf8"); while() { if ($_=~/^Artist\s+:\s*(.*)/) { $photographer = $1; } if ($_=~/^Date\/Time Original\s+:\s*(.*)/) { $datetimeoriginal = $1; } if ($_=~/^Copyright\s+:\s*(.*)/) { $copyright = $1; } if ($_=~/^Image Description\s+:\s*(.*)/) { $desc = $1; } if ($_=~/^Description\s+:\s*(.*)/) { if (length($1) > length($desc)) { $desc = $1; } } } close(EXIF); return ($photographer, $datetimeoriginal, $copyright, $desc); } ############################################################################