diff --git a/lib/screens/article_website.dart b/lib/screens/article_website.dart index 0400af26..9f110e14 100644 --- a/lib/screens/article_website.dart +++ b/lib/screens/article_website.dart @@ -678,21 +678,25 @@ class ArticleWebsiteState extends State { try { pdfLink = await controller.evaluateJavascript(source: r""" - var directPdfLink = document.querySelector('a[href*="/doi/pdfdirect/"][href*="?download=true"]'); - if (directPdfLink) { - return directPdfLink.href; - } - - var link = document.querySelector('a.read-link[data-track-action="Download PDF"], a[data-track-label="PdfLink"], a.pdf-link, a[href*="/doi/pdf/"]'); - if (link) { - return link.href; - } - link = document.querySelector('a[aria-label*="Download PDF"]'); - if (link) { - return link.href; - } - return null; - """) as String?; + (function() { + + var apsButton = document.querySelector('div.right a.sm-primary-button[href*="/pdf/"]'); + if (apsButton) return apsButton.href; + var apsJournalLink = document.querySelector('a[href^="/prl/pdf/"], a[href^="/pra/pdf/"], a[href^="/prb/pdf/"], a[href^="/pre/pdf/"], a[href^="/prx/pdf/"]'); + if (apsJournalLink) return apsJournalLink.href; + + var directPdfLink = document.querySelector('a[href*="/doi/pdfdirect/"][href*="?download=true"]'); + if (directPdfLink) return directPdfLink.href; + + var link = document.querySelector('a.read-link[data-track-action="Download PDF"], a[data-track-label="PdfLink"], a.pdf-link, a[href*="/doi/pdf/"]'); + if (link) return link.href; + + var ariaLink = document.querySelector('a[aria-label*="Download PDF"]'); + if (ariaLink) return ariaLink.href; + + return null; + })(); + """) as String?; if (pdfLink == null || pdfLink.isEmpty) { const int retries = 10; @@ -939,8 +943,15 @@ class ArticleWebsiteState extends State { logger.info( 'Full HTTP Request Headers being sent: $headers'); + final client = http.Client(); + + final request = http.Request('GET', finalDownloadUri) + ..headers.addAll(headers) + ..followRedirects = true; + + final streamedResponse = await client.send(request); final response = - await http.get(finalDownloadUri, headers: headers); + await http.Response.fromStream(streamedResponse); if (!mounted) { logger.warning( diff --git a/lib/services/abstract_scraper.dart b/lib/services/abstract_scraper.dart index 701b6e1d..d8b4fd1f 100644 --- a/lib/services/abstract_scraper.dart +++ b/lib/services/abstract_scraper.dart @@ -92,6 +92,13 @@ class AbstractScraper { } } } + if (!output.abstract) { + let sectionAbstract = document.getElementById('abstract') || document.querySelector('section.abstract'); + if (sectionAbstract) { + let inner = sectionAbstract.querySelector('.content, .abstract-text, .wrapper'); + output.abstract = extractFullText(inner || sectionAbstract); + } + } let abstractDiv = [...document.querySelectorAll('div, section')] .find(el => /abstract/i.test(el.className) || /abstract/i.test(el.id));