Skip to content

Commit ec7def0

Browse files
committed
feat(diff-identity): dedupe by comparing to identity of parent property
1 parent 35efea5 commit ec7def0

File tree

2 files changed

+145
-2
lines changed

2 files changed

+145
-2
lines changed

helper/diffPlaces.js

+56-2
Original file line numberDiff line numberDiff line change
@@ -124,20 +124,72 @@ function isNameDifferent(item1, item2, requestLanguage){
124124
// iterate over all the languages in item2, comparing them to the
125125
// 'default' name of item1 and also against the language requested by the user.
126126
for( let lang in names2 ){
127+
if (!names2.hasOwnProperty(lang)) { continue; } // do not iterate over inherited properties
127128
if( !isPropertyDifferent({[lang]: names1.default}, names2, lang) ){ return false; }
128129
if( requestLanguage && !isPropertyDifferent({[lang]: names1[requestLanguage]}, names2, lang) ){ return false; }
129130
}
130131

131-
// iterate over all the languages in item1, comparing them to the
132-
// 'default' name of item2 and also against the language requested by the user.
132+
// as above, but inverse
133133
for( let lang in names1 ){
134+
if (!names1.hasOwnProperty(lang)) { continue; } // do not iterate over inherited properties
134135
if( !isPropertyDifferent({[lang]: names2.default}, names1, lang) ){ return false; }
135136
if( requestLanguage && !isPropertyDifferent({[lang]: names2[requestLanguage]}, names1, lang) ){ return false; }
136137
}
137138

138139
return true;
139140
}
140141

142+
// convenience function to return a GID (or null in case of an error)
143+
// note: supports scalar values else takes the first element of an Array.
144+
const GID = (item) => {
145+
const parts = _.compact([
146+
_.first(_.castArray(_.get(item, 'source'))),
147+
_.first(_.castArray(_.get(item, 'layer'))),
148+
_.first(_.castArray(_.get(item, 'source_id')))
149+
]);
150+
151+
return (parts.length === 3) ? parts.join(':') : null;
152+
};
153+
154+
// convenience function to return a *parent GID (or null in case of an error)
155+
// note: supports scalar values else takes the first element of an Array.
156+
// note: defaults the 'source' to 'whosonfirst' for backwards compatibility
157+
const parentGID = (item, layer) => {
158+
const source = _.first(_.compact(_.castArray(_.get(item, ['parent', `${layer}_source`]))));
159+
const source_id = _.get(item, ['parent', `${layer}_id`]);
160+
161+
return GID({ layer, source: (source || 'whosonfirst'), source_id });
162+
};
163+
164+
/**
165+
* Compare the item properties and return true for any of the following predicates, else false
166+
* - IsEqual -- The two items are save the same source, source_id and layer
167+
* - IsEquivalant -- The two items are otherwise equalivant (as per corresponding code comments)
168+
*/
169+
function isEquivalentIdentity(item1, item2) {
170+
171+
// Both items must be on the same layer
172+
if (item1.layer !== item2.layer) { return false; }
173+
174+
// Generate a GID value for each item
175+
const gid1 = GID(item1);
176+
const gid2 = GID(item2);
177+
178+
// Invalid GID(s)
179+
if (gid1 === null || gid2 === null) { return false; }
180+
181+
// Equal GIDs
182+
if (gid1 === gid2) { return true; }
183+
184+
// Equivalant GID ($item1 == $item2.parent[$item1.layer])
185+
if (gid1 === parentGID(item2, item1.layer)) { return true; }
186+
187+
// Equivalant GID ($item2 = $item1.parent[$item2.layer])
188+
if (gid2 === parentGID(item1, item2.layer)) { return true; }
189+
190+
return false;
191+
}
192+
141193
/**
142194
* Compare the address_parts properties if they exist.
143195
* Returns false if the objects are the same, else true.
@@ -174,6 +226,7 @@ function isAddressDifferent(item1, item2){
174226
* Optionally provide $requestLanguage (req.clean.lang.iso6393) to improve name deduplication.
175227
*/
176228
function isDifferent(item1, item2, requestLanguage){
229+
if( isEquivalentIdentity( item1, item2 ) ){ return false; }
177230
if( isLayerDifferent( item1, item2 ) ){ return true; }
178231
if( isParentHierarchyDifferent( item1, item2 ) ){ return true; }
179232
if( isNameDifferent( item1, item2, requestLanguage ) ){ return true; }
@@ -245,3 +298,4 @@ module.exports.isDifferent = isDifferent;
245298
module.exports.layerPreferences = layerPreferences;
246299
module.exports.isNameDifferent = isNameDifferent;
247300
module.exports.normalizeString = normalizeString;
301+
module.exports.isEquivalentIdentity = isEquivalentIdentity;

test/unit/helper/diffPlaces.js

+89
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
const isDifferent = require('../../../helper/diffPlaces').isDifferent;
22
const isNameDifferent = require('../../../helper/diffPlaces').isNameDifferent;
33
const normalizeString = require('../../../helper/diffPlaces').normalizeString;
4+
const isEquivalentIdentity = require('../../../helper/diffPlaces').isEquivalentIdentity;
45

56
module.exports.tests = {};
67

@@ -539,6 +540,94 @@ module.exports.tests.isNameDifferent = function (test, common) {
539540
});
540541
};
541542

543+
module.exports.tests.isEquivalentIdentity = function (test, common) {
544+
test('basic equivalence', function (t) {
545+
t.true(isEquivalentIdentity(
546+
{ source: 'test', source_id: '1', layer: 'example' },
547+
{ source: 'test', source_id: '1', layer: 'example' }
548+
), 'same source, source_id and layer');
549+
550+
t.false(isEquivalentIdentity(
551+
{ source: 'test', source_id: '1', layer: 'example' },
552+
{ source: 'foo', source_id: '1', layer: 'example' }
553+
), 'different source');
554+
555+
t.false(isEquivalentIdentity(
556+
{ source: 'test', source_id: '1', layer: 'example' },
557+
{ source: 'test', source_id: '2', layer: 'example' }
558+
), 'different source_id');
559+
560+
// two records sharing the same source, source_id but with
561+
// differing layer are considered not equal.
562+
// in practice this should be rare/never happen but if it
563+
// becomes an issue would could consider making this stricter.
564+
t.false(isEquivalentIdentity(
565+
{ source: 'test', source_id: '1', layer: 'example' },
566+
{ source: 'test', source_id: '1', layer: 'foo' }
567+
), 'same source, source_id and layer');
568+
569+
// if either record fails to generate a valid GID then they are
570+
// considered not equivalent.
571+
t.false(isEquivalentIdentity(
572+
{ source: 'test', source_id: '1' },
573+
{ source: 'test', source_id: '1', layer: 'example' }
574+
), 'invalid GID');
575+
576+
t.end();
577+
});
578+
579+
test('equivalence via parent hierarchy', function (t) {
580+
t.true(isEquivalentIdentity(
581+
{ source: 'foo', source_id: '1', layer: 'example' },
582+
{ source: 'bar', source_id: '2', layer: 'example', parent: {
583+
example_id: '1',
584+
example_source: 'foo'
585+
}
586+
}), 'match parent GID');
587+
588+
t.false(isEquivalentIdentity(
589+
{ source: 'foo', source_id: '1', layer: 'example' },
590+
{ source: 'bar', source_id: '2', layer: 'example', parent: {
591+
foo_id: '1',
592+
foo_source: 'foo'
593+
}
594+
}), 'parent name must be from same layer');
595+
596+
t.false(isEquivalentIdentity(
597+
{ source: 'foo', source_id: '1', layer: 'example' },
598+
{ source: 'bar', source_id: '2', layer: 'example', parent: {
599+
foo_id: '1',
600+
example_source: 'foo'
601+
}
602+
}), 'parent name must have the same source_id');
603+
604+
t.false(isEquivalentIdentity(
605+
{ source: 'foo', source_id: '1', layer: 'example' },
606+
{ source: 'bar', source_id: '2', layer: 'example', parent: {
607+
example_id: '1',
608+
example_source: 'not_foo'
609+
}
610+
}), 'parent name must have the same source');
611+
612+
t.true(isEquivalentIdentity(
613+
{ source: 'whosonfirst', source_id: '1', layer: 'example' },
614+
{ source: 'test', source_id: '2', layer: 'example', parent: {
615+
example_id: '1'
616+
}
617+
}), 'parent source defaults to "whosonfirst"');
618+
619+
t.true(isEquivalentIdentity(
620+
{ source: 'whosonfirst', source_id: '1', layer: 'example' },
621+
{ source: 'test', source_id: '2', layer: 'example', parent: {
622+
example_id: ['1'],
623+
example_source: [null]
624+
}
625+
}), 'parent source defaults to "whosonfirst" (arrays)');
626+
627+
t.end();
628+
});
629+
};
630+
542631
module.exports.tests.normalizeString = function (test, common) {
543632
test('lowercase', function (t) {
544633
t.equal(normalizeString('Foo Bar'), 'foo bar');

0 commit comments

Comments
 (0)