#!/usr/bin/perl use JSON::XS; open(OUTV, ">$ARGV[0].vecs.tsv"); open(OUTM, ">$ARGV[0].meta.tsv"); binmode(OUTM, ":utf8"); print OUTM "Title\tURL\tLang\n"; open(FILE, $ARGV[0]); while() { $_=~s/\s+$//; undef($ref); $ref = decode_json($_); if (!defined($ref)) { next; } #write the vector... $" = "\t"; print OUTV "@{$ref->{'docembed'}}\n"; #write the metadata... $title = $ref->{'title'}; $title=~s/\s+/ /gs; if (length($title) < 3) { $title = 'Untitled'; } $url = $ref->{'url'}; $url=~s/\s+/ /gs; $lang = $ref->{'lang'}; $lang=~s/\s+/ /gs; print OUTM "$title\t$url\t$lang\n"; } close(FILE); close(OUTV); close(OUTM);