diff --git a/.gitignore b/.gitignore index 93f1361..bdba0b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ node_modules npm-debug.log +.idea +*.iml \ No newline at end of file diff --git a/README.md b/README.md index 3e1c7dc..f62aa44 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ Content sanitization enabled: { "byline":"Nicolas Perriault —", "content":"

So finally you're testing", + "language": "en" "length":2867, "title":"Get your Frontend JavaScript Code Covered | Code", "uri":"https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/", @@ -77,6 +78,7 @@ Content sanitization disabled (default): { "byline":"Nicolas Perriault —", "content":"

\n

So finally you're…", + "language": "en" "length":3851, "title":"Get your Frontend JavaScript Code Covered | Code", "uri":"https://nicolas.perriault.net/code/2013/get-your-frontend-javascript-code-covered/", diff --git a/phantom-scrape.js b/phantom-scrape.js index d1c1ce5..232dcec 100644 --- a/phantom-scrape.js +++ b/phantom-scrape.js @@ -23,6 +23,31 @@ function outputJSON(object) { */ function runReadability(url, userAgent, pageContent) { var location = document.location; + var getLanguage = function(document) { + var lang = document.documentElement.lang; + if (lang !== undefined) { + return lang; + } + var metas = document.getElementsByTagName('meta'); + for (var i=0; i