This was the primary inspiration for my latest project, which features Kurt Vonnegut’s complete works, analyzed for sentiment, and visualized as interactive word clouds. I developed it entirely in front-end JavaScript, and it’s currently hosted on GitHub pages: rossgoodwin.com/vonnegut
Users can scrub through the sentiment graph of each book from start to finish and see a word cloud displayed for each position on the slider. Each word cloud represents 10 paragraphs of the book. Along with the rises and dips in the graph, sentiment values are indicated by the color of the word cloud text, which ranges from dark green (highly positive) to bright red (highly negative).
Rather than simply using word count or frequency for the size of the words, I used TF-IDF scores. (Each 10 paragraph block was treated as one document, and each book was treated as an independent set of documents.) As a result, the largest words in each word cloud are those that make their respective section unique in the context of the entire book.
The first steps in creating this project were to parse Vonnegut’s books, perform TF-IDF calculations for each word and sentiment analysis for each 10-paragraph segment, then store the resulting data in a set of JSON files. Here are the iPython Notebooks where I completed these steps:
Once I had the JSON files, I used D3 to create the word clouds and Chart.js to create the line graphs. The sliders are HTML range inputs, modified with custom CSS. I wanted to create the appearance of long, semi-transparent planchettes sliding over the graphs. Getting the sliders to line up with the graphs precisely was particularly challenging, as was providing the option to click on the graphs in any location and automatically move the sliders to that location.
Here is my JavaScript code, in its current state:
(function() { Number.prototype.map = function (in_min, in_max, out_min, out_max) { return (this - in_min) * (out_max - out_min) / (in_max - in_min) + out_min; } function titleCase(str) { return str.replace(/\w\S*/g, function(txt){return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();}); } // Charts.js global config Chart.defaults.global.animation = false; Chart.defaults.global.tooltipEvents = []; Chart.defaults.global.scaleFontFamily = "'Cousine', monospace"; Chart.defaults.global.showScale = false; // var spectrum = ['#F22613', '#E74C3C', '#D35400', '#F2784B', '#95A5A6', '#68C3A3', '#4DAF7C', '#3FC380', '#2ECC71']; var spectrum = ["#f22613", "#f25749", "#f28379", "#f2b0aa", "#95a5a6", "#add9c2", "#74b391", "#45996c", "#1e824c"]; $("#key-block").append( '<div id=\"key-text-box\"><p class=\"text-center lead small\" style=\"margin-left: 7px;\"><<< negative | positive >>></p></div>' ); spectrum.map(function(hex){ $("#key-block").append( '<div class=\"key-color\" style=\"background-color:'+hex+';\"></div>' ); }); function updateCloud(bookslug, section) { $.getJSON("data/vonnegut-"+section+".json", function(data){ // var factor = Math.pow(data[bookslug]['tfidf'].length, 2); var layout = d3.layout.cloud() .size([800, 500]) .words(data[bookslug]['tfidf'].map(function(d) { return {text: d[0], size: d[1] * 500}; })) .padding(3) .rotate(function() { return 0; }) // return ~~(Math.random() * 2) * 90 .font("Cousine") .fontSize(function(d) { return d.size; }) .on("end", draw); layout.start(); function draw(words) { var overallContainer = d3.select("#"+bookslug); overallContainer.select("svg").remove(); overallContainer.select("a").remove(); var svgContainer = overallContainer.append("svg") .attr("width", layout.size()[0]) .attr("height", layout.size()[1]) .attr("class", "svg-cont"); var wordCloud = svgContainer.append("g") .attr("transform", "translate(" + layout.size()[0] / 2 + "," + layout.size()[1] / 2 + ")") .selectAll("text") .data(words) .enter().append("text") .transition().duration(500) .style("font-size", function(d) { return d.size + "px"; }) .style("font-family", "Cousine") .style("fill", function(d, i) { var sentiment = data[bookslug]['sentiment']; var ix = Math.floor(((sentiment + 1)/2)*spectrum.length); return spectrum[ix]; }) .attr("text-anchor", "middle") .attr("transform", function(d) { return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")"; }) .text(function(d) { return d.text; }); var title = titleCase(data[bookslug]['title']); var labelText = overallContainer .append("a") .attr("href", "http://www.amazon.com/exec/obidos/external-search/?field-keywords=%s"+title+"&mode=blended") .attr("class", "twitter-link") .attr("target", "_blank") .text(title); overallContainer.transition() .style("opacity", 1.0) .delay(1000) .duration(3000); } }); } $.getJSON("data/sentiment.json", function(sent){ $.getJSON("data/vonnegut-0.json", function(data){ $("#loadinggif").fadeOut("slow"); Object.keys(data).sort().map(function(slug){ $("#vis").append( '<div id=\"'+slug+'\" class=\"col-md-12 transparent text-center\"></div>' ); $("#"+slug).append( '<canvas class="chart-canvas" id=\"'+slug+'-chart\" width=\"800\" height=\"150\"></canvas>' ); var ctx = document.getElementById(slug+"-chart").getContext("2d"); var xLabels = []; for (var i=0;i<data[slug]['length'];i++) { xLabels.push(''); } var chartData = { labels: xLabels, datasets: [ { label: titleCase(data[slug]['title']), fillColor: "rgba(210, 215, 211, 0.7)", strokeColor: "rgba(189, 195, 199, 1)", pointColor: "rgba(210, 215, 211, 1)", pointStrokeColor: "#fff", pointHighlightFill: "#fff", pointHighlightStroke: "rgba(220,220,220,1)", data: sent[slug] } ] }; var chartOptions = { pointDot : false, pointHitDetectionRadius : 5, scaleShowVerticalLines: false, bezierCurve: false }; var myNewChart = new Chart(ctx).Line(chartData, chartOptions); var stepCount = data[slug]['length'] - 1; $("#"+slug).append( '<div class=\"scrubber\"><input id=\"'+slug+'-scrub\" type=\"range\" min=\"0\" max=\"'+stepCount+'\" value=\"0\" step=\"1\"></div>' ); $("#"+slug+"-chart").on("click", function(evt){ var activePoints = myNewChart.getPointsAtEvent(evt); var xPos = activePoints[Math.floor(activePoints.length/2)].x; var ix = Math.floor(xPos.map(0, 800, 0, data[slug]['length'])); console.log(xPos); console.log(ix); $('#'+slug+'-scrub').val(ix); updateCloud(slug, ix); }); // Play Button $('#'+slug).append( '<button type=\"button\" id=\"'+slug+'-btn\" class=\"btn btn-default btn-xs play-btn\" aria-label=\"Play\"><span class=\"glyphicon glyphicon-play\" aria-hidden=\"true\"></span></button>' ); $('#'+slug).append( '<button type=\"button\" id=\"'+slug+'-btn-pause\" class=\"btn btn-default btn-xs play-btn\" aria-label=\"Pause\"><span class=\"glyphicon glyphicon-pause\" aria-hidden=\"true\"></span></button>' ); // Load First Clouds updateCloud(slug, 0); var play; $('#'+slug+'-btn').click(function(){ console.log('clicked ' + slug); autoAdvance(); play = setInterval(function(){ autoAdvance(); }, 5000); function autoAdvance(){ var scrubVal = $('#'+slug+'-scrub').val(); console.log(data[slug]['length']); if (scrubVal >= data[slug]['length']-1) { console.log("EOR"); clearInterval(play); } console.log(scrubVal); var newVal = parseInt(scrubVal, 10) + 1; $('#'+slug+'-scrub').val(newVal); updateCloud(slug, newVal); } }); $('#'+slug+'-btn-pause').click(function(){ clearInterval(play); }); $("#"+slug+"-scrub").on("input", function(){ var sectNo = $(this).val(); console.log(sectNo); updateCloud(slug, sectNo); }); }); }); }); })();
The rest of my front-end code can be found on GitHub.
]]>With all the images hosted on Amazon s3, and the tag data hosted on parse.com, I created a simple page where users can explore the candidates’ images by topic and by candidate. The default is all topics and all candidates, but users can narrow the selection of images displayed by making multiple selections from each field. Additionally, more images will load as you scroll down the page.
Unfortunately, the AI-enabled image tagging doesn’t always work as well as one might hope.
Here’s the page’s JavaScript code:
var name2slug = {}; var slug2name = {}; Array.prototype.remove = function() { var what, a = arguments, L = a.length, ax; while (L && this.length) { what = a[--L]; while ((ax = this.indexOf(what)) !== -1) { this.splice(ax, 1); } } return this; } Array.prototype.chunk = function(chunkSize) { var array=this; return [].concat.apply([], array.map(function(elem,i) { return i%chunkSize ? [] : [array.slice(i,i+chunkSize)]; }) ); } function dateFromString(str) { var m = str.match(/(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z/); var date = new Date(Date.UTC(+m[1], +m[2], +m[3], +m[4], +m[5], +m[6])); var options = { weekday: "long", year: "numeric", month: "short", day: "numeric", hour: "2-digit", minute: "2-digit" }; return date.toLocaleTimeString("en-us", options); } function updatePhotos(query) { $.ajax({ url: 'https://api.parse.com/1/classes/all_photos?limit=1000&where='+JSON.stringify(query), type: 'GET', dataType: 'json', success: function(response) { // console.log(response); $('#img-container').empty(); var curChunk = 0; var resultChunks = response['results'].chunk(30); function appendPhotos(chunkNo) { resultChunks[chunkNo].map(function(obj){ var date = dateFromString(obj['datetime']) var imgUrl = "https://s3-us-west-2.amazonaws.com/electionscrape/" + obj['source'] + "/400px_" + obj['filename']; var fullImgUrl = "https://s3-us-west-2.amazonaws.com/electionscrape/" + obj['source'] + "/" + obj['filename']; $('#img-container').append( $('<div class=\"grid-item\"></div>').append( '<a href=\"'+fullImgUrl+'\"><img src=\"'+imgUrl+'\" width=\"280px\"></a><p>'+slug2name[obj['candidate']]+'</p><p>'+date+'</p><p>'+obj['source']+'</p>' ) // not a missing semicolon ); // console.log(obj['candidate']); // console.log(obj['datetime']); // console.log(obj['source']); // console.log(obj['filename']); }); } appendPhotos(curChunk); window.onscroll = function(ev) { if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) { curChunk++; appendPhotos(curChunk); } }; }, error: function(response) { "error" }, beforeSend: setHeader }); } function setHeader(xhr) { xhr.setRequestHeader("X-Parse-Application-Id", "ID-GOES-HERE"); xhr.setRequestHeader("X-Parse-REST-API-Key", "KEY-GOES-HERE"); } function makeQuery(candArr, tagArr) { orArr = tagArr.map(function(tag){ return { "tags": tag }; }) if (tagArr.length === 0 && candArr.length > 0) { var query = { 'candidate': {"$in": candArr} }; } else if (tagArr.length > 0 && candArr.length === 0) { var query = { '$or': orArr }; } else if (tagArr.length === 0 && candArr.length === 0) { var query = {}; } else { var query = { 'candidate': {"$in": candArr}, '$or': orArr }; } updatePhotos(query); } (function(){ $('.grid').masonry({ // options itemSelector: '.grid-item', columnWidth: 300 }); var selectedCandidates = []; var selectedTags = []; $.getJSON("data/candidates.json", function(data){ var candNames = Object.keys(data).map(function(slug){ var name = data[slug]['name']; name2slug[name] = slug; slug2name[slug] = name; return name; }).sort(); candNames.map(function(name){ $('#candidate-dropdown').append( '<li class=\"candidate-item\"><a href=\"#\">'+name+'</a></li>' ); }); $('.candidate-item').click(function(){ var name = $(this).text(); var slug = name2slug[name]; if ($.inArray(slug, selectedCandidates) === -1) { selectedCandidates.push(slug); makeQuery(selectedCandidates, selectedTags); console.log(selectedCandidates); $('#selected-candidates').append( $('<button class=\"btn btn-danger btn-xs cand-select-btn\"><span class=\"glyphicon glyphicon-remove\" aria-hidden=\"true\"></span>'+name+'</button>') .click(function(){ $(this).fadeOut("fast", function(){ selectedCandidates.remove(name2slug[$(this).text()]); makeQuery(selectedCandidates, selectedTags); console.log(selectedCandidates); }); }) // THIS IS NOT A MISSING SEMI-COLON ); } }); }); $.getJSON("data/tags.json", function(data){ var tags = data["tags"].sort(); tags.map(function(tag){ $('#tag-dropdown').append( '<li class=\"tag-item\"><a href=\"#\">'+tag+'</a></li>' ); }); $('.tag-item').click(function(){ var tag = $(this).text(); if ($.inArray(tag, selectedTags) === -1) { selectedTags.push(tag); makeQuery(selectedCandidates, selectedTags); console.log(selectedTags); $('#selected-tags').append( $('<button class=\"btn btn-primary btn-xs tag-select-btn\"><span class=\"glyphicon glyphicon-remove\" aria-hidden=\"true\"></span>'+tag+'</button>') .click(function(){ $(this).fadeOut("fast", function(){ selectedTags.remove($(this).text()); makeQuery(selectedCandidates, selectedTags); console.log(selectedTags); }); }) ); } }); }); makeQuery(selectedCandidates, selectedTags); })();
]]>