little green footballs

Tech Note: The LGF Pages Bookmarklet Source Code

Sun, Jan 3, 2016 at 6:13:05 pm

Image via Shutterstock

It's been a while since I posted the source code for our LGF Pages bookmarklet, and it's actually undergone some major changes recently, so let's remedy that omission right now, shall we?

The biggest change: all of the code has been rewritten to use native Javascript, doing away with the need to load the jQuery library. This means the bookmarklet will open more quickly, since the new version only needs to load one external script instead of two.

I'll start by posting the entire script, then I'll take it apart section by section to explain how it all works.

/*
LGF Pages Posting Bookmarklet
By Charles F. Johnson
Copyright 2016 Little Green Footballs
All Rights Reserved.

File: lgf-postpage-loader.js
Last revision: 1/1/16 at 10:28 AM
*/
(function() {
	var url = location.href,
		d = document,
		ww = 720,
		wh = 250,
		version = '1.7.0',
		input,
		key,
		value,
		obj,
		theScript,
		noFunctions = RegExp([
			'(\.(?:pdf|jpe?g|gif|png|mp[34])(?:[\?#]|$))',
			'^https?://twitter\.com'
		].join('|'), 'i'),
		popup = window.open(
			'',
			'',
			'scrollbars=1,resizable=1,width=' + ww + ',height=' + wh
		);

	popup.document.write(
		'<!DOCTYPE html>' +
		'<html>' +
		'<head>' +
		'<meta charset="UTF-8">' +
		'<title>LGF Pages</title>' +
		'<meta name="viewport" content="width=device-width, initial-scale=1">' +
		'<style type="text/css">' +
		'*{' +
			'font-family:"Trebuchet MS",sans-serif;' +
			'font-weight:normal;' +
			'margin:0;' +
			'padding:0;' +
			'position:relative;' +
		'}' +
		'window{' +
			'width:' + ww + 'px;' +
			'height:' + wh + 'px;' +
		'}' +
		'body{' +
			'background:#D8E9D8;' +
			'background:linear-gradient(top, #D8E9D8, white) fixed;' +
		'}' +
		'img{' +
			'top:2px;' +
			'width:32px;' +
			'height:32px;' +
		'}' +
		'div{' +
			'width:560px;' +
			'background-color:white;' +
			'border:2px solid #B0B0B0;' +
			'border-radius:12px;' +
			'text-align:center;' +
			'margin:0 auto;' +
			'z-index:1;' +
			'top:-6px;' +
		'}' +
		'h1{' +
			'font-size:42px;' +
			'line-height:42px;' +
			'color:#548563;' +
			'letter-spacing:-0.09em;' +
			'text-align:center;' +
			'z-index:2;' +
		'}' +
		'h3{' +
			'font-size:220%;' +
			'margin:1em 0;' +
			'color:#548563;' +
		'}' +
		'</style>' +
		'</head>' +
		'<body>' +
		'<h1>lgf pages</h1>' +
		'<div>' +
		'<h3><img src="http://littlegreenfootballs.com/bigroller.gif"> Loading...</h3>' +
		'<form action="http://littlegreenfootballs.com/weblog/lgf-postpage.php" method="POST" accept-charset="UTF-8">' +
		'<input type="hidden" name="v" value="' + version + '">' +
		'</form>' +
		'</div>' +
		'</body>' +
		'</html>'
	);
	if (url.match(noFunctions)) {
		cantInject();
	}
	theScript = d.createElement('script');
	theScript.src = '//littlegreenfootballs.com/lgfjs/lgf-postpage-functions.min.js?' + Math.floor(new Date().getTime() / 1000);
	theScript.onload = function() {
		theScript.parentNode.removeChild(theScript);
		doForm($LGF_Page.getQuery(version));
	};
	theScript.onerror = function() {
		cantInject();
	};
	d.getElementsByTagName('head')[0].appendChild(theScript);
	function doForm(obj) {
		var popForm = popup.document.forms[0];
		for (key in obj) {
			input = popup.document.createElement('input');
			input.type = 'hidden';
			input.name = key;
			input.value = obj[key];
			popForm.appendChild(input);
		}
		popForm.submit();
	}
	function cantInject() {
		obj = {
			u: url,
			x: 1
		};
		if (typeof d.title != 'undefined') {
			obj.t = d.title;
			obj.s = d.selection ? d.selection.createRange().text : d.getSelection();
		}
		doForm(obj);
	}
})();

OK. Now that I've inflicted all that Javascript upon you, dear reader, let's dig in and explain what's going on.

Starting at the top, this entire bookmarklet is wrapped in something known as a self-executing anonymous function (SEAF), so that all the functions and variables contained within it remain private and are only available in the scope of the self-executing function. This is a very important concept for bookmarklets; it ensures that variables can't conflict with any global variables on the web page in which you invoke the bookmarklet. Because that would be a BAD THING.

It's "self-executing" because it immediately executes as soon as it's defined, and "anonymous" because it has no function name. A SEAF typically looks like this:

(function(){
 // the code
})();

And if you look at the beginning and ending lines of our script, you'll see that all of the code is contained within a SEAF.

The first order of business is defining variables that will be needed within the script; I like to do this at the beginning of a block of code so all the variable definitions are in one place. That's what lines 11 through 29 do.

But there are a couple of tricky things going on in there. For example:

noFunctions = RegExp([
	'(\.(?:pdf|jpe?g|gif|png|mp[34])(?:[\?#]|$))',
	'^https?://twitter\.com'
].join('|'), 'i')

The "noFunctions" variable is actually a regular expression that checks the URL of the page in which the bookmarklet is invoked, to see if it needs special attention. PDFs, images, and videos can open in a browser window, but they aren't actual web pages with a Document Object Model, so we need to handle them differently.

This regular expression also checks to see if the current page is on twitter​.com, because Twitter uses special HTTP headers that prevent bookmarklets from injecting script tags -- a feature of the LGF Pages bookmarklet we're about to describe.

I wrote this as an array of strings that are joined together with the regular expression OR operator, so that I can easily add any websites to the list that need the same kind of special treatment as twitter​.com.

The "popup" variable is important to the rest of the code; this variable declaration opens the LGF Pages popup window and returns a reference to the new window.

popup = window.open(
	'',
	'',
	'scrollbars=1,resizable=1,width=' + ww + ',height=' + wh
)

That brings us to lines 31 through 94. This section uses Javascript's document.write() function to create a web page inside our popup window, that serves as a placeholder while the rest of the script executes, and also contains an HTML form that will be used to send the rest of the page data to the LGF Pages posting script. I broke this section up into a series of strings to make the structure more obvious (and easier to modify as needed).

Lines 95 through 97 use that regular expression we defined above to check the current web page's URL. If it matches one of the extensions or full URLs in the regex, we jump to the cantInject() function (which I'll describe soon).

if (url.match(noFunctions)) {
	cantInject();
}

If it's OK to inject a script tag into the current page's DOM, we then proceed to do exactly that. Lines 98 through 107 create a script element and set its src attribute to point to the external script containing all the special functions we use to examine a web page and discover any embedded video, audio and PDF files that will be automatically included in the LGF Page posting window.

theScript = d.createElement('script');
theScript.src = '//littlegreenfootballs.com/lgfjs/lgf-postpage-functions.min.js?' + (new Date().getTime());
theScript.onload = function() {
	theScript.parentNode.removeChild(theScript);
	doForm($LGF_Page.getQuery(version));
};
theScript.onerror = function() {
	cantInject();
};
d.getElementsByTagName('head')[0].appendChild(theScript);

We want to prevent the user's browser from caching the script so that it's loaded every time the bookmarklet opens, so we append the current time in milliseconds to the script's address as a query string (following a question mark). This lets us modify the external functions as needed, and know they'll always be reloaded the next time the bookmarklet is invoked.

We use the script element's onload callback to invoke the external script's code when it's finished loading and initializing; the $LGF_Page.getQuery() function returns an object which we pass to the doForm() function.

If there's some kind of error that causes the load to fail, the onerror callback jumps to the cantInject() function.

Now we get to the real meat of the enchilada, the function that actually sets up the form in our popup window, fills in the various fields with data, and submits it to the LGF Pages script: doForm(). This function is passed an object, and uses a for/in loop to create hidden inputs that use the object keys as the names of the input elements and the object values as the input elements' values, appends each new input element to the form, then submits the form. This causes the popup window to come up with the LGF Pages posting form you know and love, with all the fields filled in.

function doForm(obj) {
	var popForm = popup.document.forms[0];
	for (key in obj) {
		input = popup.document.createElement('input');
		input.type = 'hidden';
		input.name = key;
		input.value = obj[key];
		popForm.appendChild(input);
	}
	popForm.submit();
}

Previous versions of the LGF Pages bookmarklet sent all this data to the posting script by using a URL and query string (known as the HTML GET method), but this has severe limitations on length. This new version uses the POST method by submitting a form, which gets rid of the length limits.

Lines 119 through 129 contain the cantInject() function mentioned above. This is the function that's invoked when we determine that the page we're on prevents us from injecting script tags, or if there's an error when we try to load the external function script. This function creates an object to pass to doForm(), containing the URL of the page we're on and the title and any selected text (if they exist).

function cantInject() {
	obj = {
		u: url,
		x: 1
	};
	if (typeof d.title != 'undefined') {
		obj.t = d.title;
		obj.s = d.selection ? d.selection.createRange().text : d.getSelection();
	}
	doForm(obj);
}

OK. We made it all the way to the end of the lgf-postpage-loader.js bookmarklet. Give yourself a hand if you followed along this far.

But there's another part to this software entity, the external script called lgf-postpage-functions.js. Just so we have a sense of closure, I'll post that script now; but I'm not going to do a complete walk-through because I can't even. I'll just tell you that this script basically consists of one function defined as part of a global object named $LGF_Page, whose purpose is to find embedded video, audio and other files that would be nice to include in an LGF Page, and also find the underlying HTML code for any selected text. I'll leave it as an exercise for the coding-inclined to figure out exactly how it does that.

This is loaded as an external file so that it can be updated and modified as needed, when new types of files are identified and new websites added to the list, without making you, dear user, reinstall the bookmarklet for every change.

So without further ado -- the rest of the story.

/*
LGF Pages Bookmarklet Functions
Copyright 2016 Little Green Footballs
All Rights Reserved.

File: lgf-postpage-functions.js
Last revision: 1/1/16 at 5:13 PM
*/
var $LGF_Page = {
	version: '1.4.0',
	getQuery: function(loader) {
		if (typeof loader == 'undefined' || parseFloat(loader) < 1.5) {
			alert('This version of the LGF Pages bookmarklet is out of date! Please reinstall the bookmarklet to get the latest version.');
			return;
		}
		var getSelectionHtml = function() {
				var html = '';
				if (typeof window.getSelection != 'undefined') {
					var sel = window.getSelection();
					if (sel.rangeCount) {
						var container = document.createElement('div');
						for (var i = 0, len = sel.rangeCount; i < len; ++i) {
							container.appendChild(sel.getRangeAt(i).cloneContents());
						}
						html = container.innerHTML;
					}
				} else if (typeof document.selection != 'undefined') {
					if (document.selection.type == 'Text') {
						html = document.selection.createRange().htmlText;
					}
				}
				return html;
			},
			ob,
			video = image = audio = iframe = '',
			url = location.href,
			d = document,
			description = getSelectionHtml();
		if (url.match(/www\.npr\.org/i)) {
			ob = d.getElementsByClassName('download');
			if (ob.length) {
				audio = ob[0].getAttribute('href').split('?')[0];
			}
		} else if (url.match(/^http:\/\/www\.msnbc\.com\/[^\/]*\/watch\//i)) {
			ob = d.querySelectorAll('meta[itemprop="embedURL"]');
			if (ob.length) {
				iframe = ob[0].getAttribute('content');
			}
		} else if (url.match(/^http:\/\/www\.msnbc\.com\//i)) {
			ob = d.querySelectorAll('meta[property="nv:videoId"]');
			if (ob.length) {
				iframe = 'http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=' + ob[0].getAttribute('content');
			}
		} else if (url.match(/^http:\/\/www\.nbcnews\.com\//i)) {
			ob = d.getElementById('embed');
			if (ob) {
				var thediv = d.createElement('div');
				thediv.innerHTML = ob.value;
				iframe = thediv.firstChild.getAttribute('src');
			} else {
				ob = d.querySelectorAll('meta[itemprop="embedUrl"]');
				if (ob.length) {
					iframe = ob[0].getAttribute('content');
				}
			}
		} else if (url.match(/video\.pbs\.org\/video\//i)) {
			iframe = 'http://video.pbs.org/viralplayer/' + url.split('/')[4];
			ob = d.querySelectorAll('meta[property="og:image"]');
			if (ob.length) {
				image = ob[0].getAttribute('content');
			}
		} else if (url.match(/www\.rightwingwatch\.org/i)) {
			ob = d.getElementById('content-area').getElementsByTagName('embed');
			if (ob.length) {
				if (ob[0].getAttribute('src').match(/www\.youtube\.com\/v\//i)) {
					iframe = 'https://www.youtube.com/embed/' + ob[0].getAttribute('src').split('?')[0].split('/')[4];
				}
			} else {
				ob = d.getElementById('content-area').getElementsByTagName('iframe');
				if (ob.length) {
					iframe = ob[0].getAttribute('src');
					if (iframe.substring(0, 2) == '//') {
						iframe = 'http:' + iframe;
					}
				}
			}
		}
		if (iframe == '') {
			var embeds = RegExp([
				'//www\.youtube\.com/embed',
				'player\.vimeo\.',
				'w\.soundcloud\.com/player',
				'www\.scribd\.com/embeds/',
				'media\.mtvnservices\.com/embed/',
				'mediamatters\.org/embed/static/clips/'
			].join('|'), 'i');
			ob = [].slice.call(d.querySelectorAll('iframe'));
			if (ob.length) {
				ob = ob.filter(function(e) {
					return ((e.src.match(embeds) || (e.hasAttribute('data-src') && e.getAttribute('data-src').match(/\/\/www\.youtube\.com\/embed/i))) && !e.classList.contains('gaTrackYouTube'));
				});
				if (ob.length && !url.match(/www\.youtube\.com\/|vimeo\.com\/|soundcloud\.com|www\.scribd\.com/i)) {
					ob = ob[0];
					if (ob.hasAttribute('data-src')) {
						iframe = ob.getAttribute('data-src');
					} else {
						iframe = ob.getAttribute('src');
					}
					if (iframe.substring(0, 2) == '//') {
						iframe = 'http:' + iframe;
					}
				}
			}
		}
		return {
			u: url,
			t: d.title,
			f: iframe,
			m: video,
			a: audio,
			i: image,
			s: description
		};
	}
};