From 749e993c85f4c936e19cb0f31af07bf345e299dc Mon Sep 17 00:00:00 2001 From: qingzhou Date: Wed, 16 Aug 2017 15:31:23 -0700 Subject: [PATCH] Fix: Avoid analyzing `/favicon.ico` multiple times Fix #427 --- src/lib/connectors/jsdom/jsdom.ts | 2 +- .../shared/remote-debugging-connector.ts | 49 ++++++- tests/lib/connectors/collect.ts | 126 ++++++++++++++++++ .../fixtures/common/favicon-32x32.png | Bin 0 -> 719 bytes .../connectors/fixtures/common/favicon.ico | Bin 0 -> 5430 bytes 5 files changed, 170 insertions(+), 7 deletions(-) create mode 100644 tests/lib/connectors/collect.ts create mode 100644 tests/lib/connectors/fixtures/common/favicon-32x32.png create mode 100644 tests/lib/connectors/fixtures/common/favicon.ico diff --git a/src/lib/connectors/jsdom/jsdom.ts b/src/lib/connectors/jsdom/jsdom.ts index 1c0da60ed58..2bc5f6210f7 100644 --- a/src/lib/connectors/jsdom/jsdom.ts +++ b/src/lib/connectors/jsdom/jsdom.ts @@ -226,7 +226,7 @@ class JSDOMConnector implements IConnector { * * uses `favicon.ico` and the final url after redirects. */ private async getFavicon(element?: HTMLElement) { - const href = element ? element.getAttribute('href') : '/favicon.ico'; + const href = (element && element.getAttribute('href')) || '/favicon.ico'; try { await util.promisify(this.resourceLoader).call(this, { element, url: url.parse(href) }); diff --git a/src/lib/connectors/shared/remote-debugging-connector.ts b/src/lib/connectors/shared/remote-debugging-connector.ts index 994c6bcef25..1ae1ca28bcd 100644 --- a/src/lib/connectors/shared/remote-debugging-connector.ts +++ b/src/lib/connectors/shared/remote-debugging-connector.ts @@ -64,6 +64,7 @@ export class Connector implements IConnector { private _tabs = []; /** Tells if the page has specified a manifest or not. */ private _manifestIsSpecified: boolean = false; + /** Tells if a favicon of a page has been downloaded from a link tag */ private _faviconLoaded: boolean = false; private _targetNetworkData: INetworkData; @@ -151,6 +152,21 @@ export class Connector implements IConnector { return null; } + /** Check if a request or response is to or from `/favicon.ico` */ + private rootFaviconRequestOrResponse(params) { + if (!this._finalHref) { + return false; + } + const faviconUrl = url.resolve(this._finalHref, '/favicon.ico'); + const event = params.request || params.response; + + if (!event) { + return false; + } + + return this._finalHref && faviconUrl === event.url; + } + /** Event handler for when the browser is about to make a request. */ private async onRequestWillBeSent(params) { const requestUrl: string = params.request.url; @@ -178,7 +194,14 @@ export class Connector implements IConnector { const eventName: string = this._href === requestUrl ? 'targetfetch::start' : 'fetch::start'; debug(`About to start fetching ${requestUrl}`); - await this._server.emitAsync(eventName, { resource: requestUrl }); + + /* `getFavicon` will make attempts to download favicon later. + * Ignore `cdp` requests to download favicon from the root + * to avoid emitting duplidate events. + */ + if (!this.rootFaviconRequestOrResponse(params)) { + await this._server.emitAsync(eventName, { resource: requestUrl }); + } } /** Event handler fired when HTTP request fails for some reason. */ @@ -227,7 +250,13 @@ export class Connector implements IConnector { this._requests.delete(params.requestId); - await this._server.emitAsync(eventName, event); + /* `getFavicon` will make attempts to download favicon later. + * Ignore `cdp` requests to download favicon from the root + * to avoid emitting duplidate events. + */ + if (!this.rootFaviconRequestOrResponse(params)) { + await this._server.emitAsync(eventName, event); + } } private async getResponseBody(cdpResponse) { @@ -330,7 +359,13 @@ export class Connector implements IConnector { this._faviconLoaded = true; } - await this._server.emitAsync(eventName, data); + /* `getFavicon` will make attempts to download favicon later. + * Ignore `cdp` requests to download favicon from the root + * to avoid emitting duplidate events. + */ + if (!this.rootFaviconRequestOrResponse(params)) { + await this._server.emitAsync(eventName, data); + } /* We don't need to store the request anymore so we can remove it and ignore it * if we receive it in `onLoadingFailed` (used only for "catastrophic" failures). @@ -487,8 +522,8 @@ export class Connector implements IConnector { * * uses the `src` attribute of `` if present. * * uses `favicon.ico` and the final url after redirects. */ - private async getFavicon(element?: AsyncHTMLElement) { - const href = element ? element.getAttribute('href') : '/favicon.ico'; + private async getFavicon(element: AsyncHTMLElement) { + const href = (element && element.getAttribute('href')) || '/favicon.ico'; try { debug(`resource ${href} to be fetched`); @@ -545,7 +580,9 @@ export class Connector implements IConnector { } if (!this._faviconLoaded) { - await this.getFavicon((await this._dom.querySelectorAll('link[rel~="icon"]'))[0]); + const faviconElement = (await this._dom.querySelectorAll('link[rel~="icon"]'))[0]; + + await this.getFavicon(faviconElement); } await this._server.emitAsync('scan::end', event); diff --git a/tests/lib/connectors/collect.ts b/tests/lib/connectors/collect.ts new file mode 100644 index 00000000000..b49a1b69b82 --- /dev/null +++ b/tests/lib/connectors/collect.ts @@ -0,0 +1,126 @@ +/* eslint-disable no-sync */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as url from 'url'; +import * as sinon from 'sinon'; + +import test from 'ava'; + +import { builders } from '../../helpers/connectors'; +import { createServer } from '../../helpers/test-server'; +import { IConnector, IConnectorBuilder, INetworkData } from '../../../src/lib/types'; // eslint-disable-line no-unused-vars +import { generateHTMLPage } from '../../helpers/misc'; + +test.beforeEach(async (t) => { + const sonar = { + emit() { }, + emitAsync() { } + }; + + const server = createServer(); + + await server.start(); + + sinon.spy(sonar, 'emit'); + sinon.spy(sonar, 'emitAsync'); + + t.context = { + server, + sonar + }; +}); + +test.afterEach.always((t) => { + t.context.server.stop(); + t.context.sonar.emit.restore(); + t.context.sonar.emitAsync.restore(); +}); + +const pathToFaviconInDir = path.join(__dirname, './fixtures/common/favicon.ico'); +const pathToFaviconInLinkElement = path.join(__dirname, './fixtures/common/favicon-32x32.png'); + +const runTest = async (t, connectorBuilder, serverConfig?) => { + const { sonar } = t.context; + const connector: IConnector = await (connectorBuilder)(sonar, {}); + const server = t.context.server; + + if (serverConfig) { + server.configure(serverConfig); + } + + await connector.collect(url.parse(`http://localhost:${server.port}/`)); + await connector.close(); +}; + +const testConnectorCollect = (connectorInfo) => { + const connectorBuilder: IConnectorBuilder = connectorInfo.builder; + const name: string = connectorInfo.name; + + test.serial(`[${name}] Favicon is present in a 'link' element with 'rel' attribute set to 'icon' `, async (t) => { + const faviconInLinkElementDir = `http://localhost:${t.context.server.port}/images/favicon-32x32.png`; + const serverConfig = { + '/': generateHTMLPage(``), + '/images/favicon-favicon-32x32.png': fs.readFileSync(pathToFaviconInLinkElement) + }; + + await runTest(t, connectorBuilder, serverConfig); + + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').callCount, 1); + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').args[0][1].request.url, faviconInLinkElementDir); + + }); + + test.serial(`[${name}] Favicon is present in the root directory`, async (t) => { + const faviconInRootDir = `http://localhost:${t.context.server.port}/favicon.ico`; + const serverConfig = { '/favicon.ico': fs.readFileSync(pathToFaviconInDir) }; + + await runTest(t, connectorBuilder, serverConfig); + + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').callCount, 1); + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').args[0][1].request.url, faviconInRootDir); + }); + + test.serial(`[${name}] Favicon is present in both the root directory and the 'link' element`, async (t) => { + const faviconInLinkElementDir = `http://localhost:${t.context.server.port}/images/favicon-32x32.png`; + const serverConfig = { + '/': generateHTMLPage(``), + '/favicon.ico': fs.readFileSync(pathToFaviconInDir), + '/images/favicon-favicon-32x32.png': fs.readFileSync(pathToFaviconInLinkElement) + }; + + await runTest(t, connectorBuilder, serverConfig); + + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').callCount, 1); + // Should load favicon from the link element if it exists + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').args[0][1].request.url, faviconInLinkElementDir); + }); + + test.serial(`[${name}] Favicon is present in both the root directory and the 'link' element, but the 'link' element has empty 'href'`, async (t) => { + const faviconInRootDir = `http://localhost:${t.context.server.port}/favicon.ico`; + const serverConfig = { + '/': generateHTMLPage(``), + '/favicon.ico': fs.readFileSync(pathToFaviconInDir) + }; + + await runTest(t, connectorBuilder, serverConfig); + + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').callCount, 1); + // Should load favicon from the root even though the link element exists because 'href' is empty. + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').args[0][1].request.url, faviconInRootDir); + }); + + test.serial(`[${name}] Favicon is not present in either the root directory or the 'link' element`, async (t) => { + const faviconInRootDir = `http://localhost:${t.context.server.port}/favicon.ico`; + + await runTest(t, connectorBuilder); + + // Requests to `/favicon.ico` are always sent when favicon doesn't exist as a `link` tag in the html. + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').callCount, 1); + t.is(t.context.sonar.emitAsync.withArgs('fetch::end').args[0][1].request.url, faviconInRootDir); + }); +}; + +builders.forEach((connector) => { + testConnectorCollect(connector); +}); diff --git a/tests/lib/connectors/fixtures/common/favicon-32x32.png b/tests/lib/connectors/fixtures/common/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..951e6b7eb338d6426d947337cdb51456baf69e21 GIT binary patch literal 719 zcmV;=0xQDWab^E&G zpBq=~!Q3vl`I|PT>0g)6*`rj-_qaf@439F|tlu!e)rR_XDI#6?>WMZ>ien=1D%BkfHF7^ zfycdCy~rcIas8#Y`9GukzADB*ry&BU4TfWV-*zv%y?*HmVbWXM*DH~4C?f&#X`e{wba(ayOCnYIX!<#wZZls^7{o%1^~>b{ zOw_X{&cC=HPJ2Ak{vGN+1Sr5DS0M~2!YueO1zC;$;=xLoi`kX)J?=DTq9p^1-*LRsC!aaIh(O z<~WKhnD68R}2{ska6}BuWH}%EJHv002ovPDHLkV1j<0 BVV(d0 literal 0 HcmV?d00001 diff --git a/tests/lib/connectors/fixtures/common/favicon.ico b/tests/lib/connectors/fixtures/common/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..aa08eac4d8dc24acff2e260a8b79e43f3c9a391d GIT binary patch literal 5430 zcmeHLe`s4(6u!ogl~K|%LWo1QAW}-%AN@m0(SfDZp`*iEL0VDl?5tx-DP`!y;zC

mCRSX`&^p;pGh-!;F{g8B?1uaxWTlXy3ymc?8r`3wC~UPs zPKqWm7_tC`DJYOd(PR{>6j`wF?vcvy8}Gj7tEfE*OaWdMBho)zIll3qaP1cLht-s{f@rCQ!kF7e+IG&v>_w= z#Y0ER$zHtbv)u4rC>8@jQ}W6z^{v8>KR*oHfPuZh4~=cev=>+p8*I;Ku%8Y*-F#4v z#VwiaW#NWtq35F=u;+Aq^!T4>oJ>srZqH;-a`>=BKRI53wR8dfDq68_Rld#7Oo!U` zcI%x3BcIx{__vex;P&=b?(-9dF=+aFE#_~M_?TJm*`1X!y#GrOiWr+&1KiJ8w+az0 zK?alu{@2xPD9<>7DuD7)rr({N)9TA<_vU~1?1{7xrJP;>SSgnQ4jquZP>S*^@(xQ; z&WxO914^TxenC0C9%u!6fX{Sr0J2@&aZeT509NFaTkf^nojZx+&B*z7@&kW#Z_=F^ z0d(&j9l4;|wDKoTl}ZMAJv|o0YvI2d*#!Ge;IfYIY-7;$Jcyyv=x)(sptVA+K=+(` z1$Z@nYESdSVew%pBU#QJhYvn}J6dOD1 z0Jd@i!@IyUp>oZlJshZgseQiX&0+L|syr8NvY7(EL*u2EYB%2fwH43kjHT~2)m#&y z_RjkF__-4FzXWpSCOZK&T6yXoP)z~(7J8P!cOuyMnuj|&Xb-*K@d9-8YLDf_R7W+e zwg%ug!>22So9eT1;9`nw5;f{3+8@Z$*JwH1q}!mw-vIdl(C@El+(UI@q_g*Vj1Q~w zcfkK3_-UOk0n6n~)xVh^`yV~xUI#dV4R}WiXl+n;nlZMiaz}jP2g?faZ%v)Qwh!5~ z;IF~lr}LQq-_gEfrBvf(!jvi{boA`zXRNKzohQx?fSG{l*hZV zmu30Xw)hsaM7j>u!dIqm2)$U^p-(Z7hIe;$NZ|!*w)54~Lk#O8UA-^E-!Fh3U%HI$ z>z`CzU#QJhjc>fXY6I~qV(9Fpvq*X4G%y2DuGF{%^y%BNm+K!anR~Wn5Pg!-^OHFL zIiCS-O~h+HvoPhg0cF6WKsB&MMGe{tU_ELkOX9-CvbnnE4sg?5wmARlo~Gs00~%*O zZQ{2-Y=Lb}o5BAf@cn%AZik#mo9e zQRL(2p6%W9?XrsZ3Zd!z#5IcIVv5Yz<04}l2|^Vs`mPBmXGJc++s2BN$W7R&2<)gL SvJYUm4_>bWQ62C9iT(xo(IzGU literal 0 HcmV?d00001