postlight
diff --git a/Diff for: ‎fixtures/www.bloomberg.com/1481135708958.html
+1 b/Diff for: ‎fixtures/www.bloomberg.com/1481135708958.html
+1
diff --git a/Diff for: ‎fixtures/www.bloomberg.com/1481136509532.html
+754 b/Diff for: ‎fixtures/www.bloomberg.com/1481136509532.html
+754
diff --git a/Diff for: ‎fixtures/www.bloomberg.com/1481138014494.html
+1 b/Diff for: ‎fixtures/www.bloomberg.com/1481138014494.html
+1
diff --git a/Diff for: ‎src/extractors/custom/index.js
+1 b/Diff for: ‎src/extractors/custom/index.js
+1
diff --git a/Diff for: ‎src/extractors/custom/www.bloomberg.com/index.js
+74 b/Diff for: ‎src/extractors/custom/www.bloomberg.com/index.js
+74
diff --git a/Diff for: ‎src/extractors/custom/www.bloomberg.com/index.test.js
+241 b/Diff for: ‎src/extractors/custom/www.bloomberg.com/index.test.js
+241
diff --git a/Diff for: ‎src/utils/dom/constants.js
+12-1 b/Diff for: ‎src/utils/dom/constants.js
+12-1
@@ -27,3 +27,4 @@ export * from './www.aol.com';
 export * from './www.youtube.com';
 export * from './www.theguardian.com';
 export * from './www.sbnation.com';
+export * from './www.bloomberg.com';
@@ -0,0 +1,74 @@
+export const WwwBloombergComExtractor = {
+  domain: 'www.bloomberg.com',
+
+  title: {
+    selectors: [
+      // normal articles
+      '.lede-headline',
+
+      // /graphics/ template
+      'h1.article-title',
+
+      // /news/ template
+      'h1.lede-text-only__hed',
+    ],
+  },
+
+  author: {
+    selectors: [
+      ['meta[name="parsely-author"]', 'value'],
+      '.byline-details__link',
+
+      // /graphics/ template
+      '.bydek',
+
+      // /news/ template
+      '.author',
+    ],
+  },
+
+  date_published: {
+    selectors: [
+      ['time.published-at', 'datetime'],
+      ['time[datetime]', 'datetime'],
+      ['meta[name="date"]', 'value'],
+      ['meta[name="parsely-pub-date"]', 'value'],
+    ],
+  },
+
+  dek: {
+    selectors: [
+    ],
+  },
+
+  lead_image_url: {
+    selectors: [
+      ['meta[name="og:image"]', 'value'],
+    ],
+  },
+
+  content: {
+    selectors: [
+      '.article-body__content',
+
+      // /graphics/ template
+      ['section.copy-block'],
+
+      // /news/ template
+      '.body-copy',
+    ],
+
+    // Is there anything in the content you selected that needs transformed
+    // before it's consumable content? E.g., unusual lazy loaded images
+    transforms: {
+    },
+
+    // Is there anything that is in the result that shouldn't be?
+    // The clean selectors will remove anything that matches from
+    // the result
+    clean: [
+      '.inline-newsletter',
+      '.page-ad',
+    ],
+  },
+};
@@ -0,0 +1,241 @@
+import assert from 'assert';
+import fs from 'fs';
+import URL from 'url';
+import cheerio from 'cheerio';
+
+import Mercury from 'mercury';
+import getExtractor from 'extractors/get-extractor';
+import { excerptContent } from 'utils/text';
+
+describe('WwwBloombergComExtractor', () => {
+  describe('initial test case', () => {
+    let result;
+    let url;
+    beforeAll(() => {
+      url =
+        'http://www.bloomberg.com/politics/articles/2016-12-07/trump-hits-emblem-of-presidential-power-with-air-force-one-tweet';
+      const html =
+        fs.readFileSync('./fixtures/www.bloomberg.com/1481135708958.html');
+      result =
+        Mercury.parse(url, html, { fallback: false });
+    });
+
+    it('is selected properly', () => {
+      // This test should be passing by default.
+      // It sanity checks that the correct parser
+      // is being selected for URLs from this domain
+      const extractor = getExtractor(url);
+      assert.equal(extractor.domain, URL.parse(url).hostname);
+    });
+
+    it('returns the title', async () => {
+    // To pass this test, fill out the title selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { title } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(title, 'Air Force One Costs Billions of Dollars Because It’s a Flying White House');
+    });
+
+    it('returns the author', async () => {
+    // To pass this test, fill out the author selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { author } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(author, 'Margaret Talev');
+    });
+
+    it('returns the date_published', async () => {
+    // To pass this test, fill out the date_published selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { date_published } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(date_published, '2016-12-07T10:00:00.011Z');
+    });
+
+    it('returns the lead_image_url', async () => {
+    // To pass this test, fill out the lead_image_url selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { lead_image_url } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(lead_image_url, 'https://assets.bwbx.io/images/users/iqjWHBFdfxIU/ioUAfA1V2nzk/v0/-1x-1.jpg');
+    });
+
+    it('returns the content', async () => {
+      // To pass this test, fill out the content selector
+      // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      // You may also want to make use of the clean and transform
+      // options.
+      const { content } = await result;
+
+      const $ = cheerio.load(content || '');
+
+      const first13 = excerptContent($('*').first().text(), 13);
+
+      // Update these values with the expected values from
+      // the article.
+      assert.equal(first13, 'Donald Trump took aim at one of the most visible emblems of the');
+    });
+  });
+
+  describe('/graphics/ template', () => {
+    let result;
+    let url;
+    beforeAll(() => {
+      url =
+        'https://www.bloomberg.com/graphics/2016-apple-profits/';
+      const html =
+        fs.readFileSync('./fixtures/www.bloomberg.com/1481136509532.html');
+      result =
+        Mercury.parse(url, html, { fallback: false });
+    });
+
+    it('is selected properly', () => {
+      // This test should be passing by default.
+      // It sanity checks that the correct parser
+      // is being selected for URLs from this domain
+      const extractor = getExtractor(url);
+      assert.equal(extractor.domain, URL.parse(url).hostname);
+    });
+
+    it('returns the title', async () => {
+    // To pass this test, fill out the title selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { title } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(title, 'Americans Are Paying Apple Millions to Shelter Overseas Profits');
+    });
+
+    it('returns the author', async () => {
+    // To pass this test, fill out the author selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { author } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(author, 'Andrea Wong');
+    });
+
+    it('returns the date_published', async () => {
+    // To pass this test, fill out the date_published selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { date_published } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(date_published, '2016-12-07T10:00:00.000Z');
+    });
+
+    it('returns the lead_image_url', async () => {
+    // To pass this test, fill out the lead_image_url selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { lead_image_url } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(lead_image_url, 'https://www.bloomberg.com/graphics/2016-apple-profits/img/2016-apple-profits-facebook.png');
+    });
+
+    it('returns the content', async () => {
+      // To pass this test, fill out the content selector
+      // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      // You may also want to make use of the clean and transform
+      // options.
+      const { content } = await result;
+
+      const $ = cheerio.load(content || '');
+
+      const first13 = excerptContent($('*').first().text(), 13);
+
+      // Update these values with the expected values from
+      // the article.
+      assert.equal(first13, 'Over the years, Apple Inc. has become the poster child for U.S. multinationals');
+    });
+  });
+
+  describe('/news/ template', () => {
+    let result;
+    let url;
+    beforeAll(() => {
+      url =
+        'https://www.bloomberg.com/news/articles/2016-12-06/stock-rally-extends-into-asia-as-traders-await-rbi-aussie-gdp';
+      const html =
+        fs.readFileSync('./fixtures/www.bloomberg.com/1481138014494.html');
+      result =
+        Mercury.parse(url, html, { fallback: false });
+    });
+
+    it('is selected properly', () => {
+      // This test should be passing by default.
+      // It sanity checks that the correct parser
+      // is being selected for URLs from this domain
+      const extractor = getExtractor(url);
+      assert.equal(extractor.domain, URL.parse(url).hostname);
+    });
+
+    it('returns the title', async () => {
+    // To pass this test, fill out the title selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { title } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(title, 'U.S. Stocks Rise to Records, Bonds Gain on ECB Stimulus Optimism');
+    });
+
+    it('returns the author', async () => {
+    // To pass this test, fill out the author selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { author } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(author, 'Jeremy Herron');
+    });
+
+    it('returns the date_published', async () => {
+    // To pass this test, fill out the date_published selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { date_published } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(date_published, '2016-12-06T23:22:22.402Z');
+    });
+
+    it('returns the lead_image_url', async () => {
+    // To pass this test, fill out the lead_image_url selector
+    // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      const { lead_image_url } = await result;
+
+    // Update these values with the expected values from
+    // the article.
+      assert.equal(lead_image_url, 'https://assets.bwbx.io/javelin/public/images/social-markets-3d32d2f713.jpg');
+    });
+
+    it('returns the content', async () => {
+      // To pass this test, fill out the content selector
+      // in ./src/extractors/custom/www.bloomberg.com/index.js.
+      // You may also want to make use of the clean and transform
+      // options.
+      const { content } = await result;
+
+      const $ = cheerio.load(content || '');
+
+      const first13 = excerptContent($('*').first().text(), 13);
+
+      // Update these values with the expected values from
+      // the article.
+      assert.equal(first13, 'The Dow Jones Industrial Average rose 220 points as U.S. stock indexes powered');
+    });
+  });
+});
@@ -29,7 +29,18 @@ export const STRIP_OUTPUT_TAGS = [
 export const REMOVE_ATTRS = ['style', 'align'];
 export const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);
 export const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
-export const WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
+export const WHITELIST_ATTRS = [
+  'src',
+  'srcset',
+  'href',
+  'class',
+  'id',
+  'alt',
+  'xlink:href',
+  'width',
+  'height',
+];
+
 export const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');
 
 // removeEmpty