Skip to content

Commit 16eaa0c

Browse files
committed
bug:SP-3626 Fixes winnowing parsing issue
1 parent d3e0756 commit 16eaa0c

File tree

4 files changed

+31
-18
lines changed

4 files changed

+31
-18
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Upcoming changes...
13+
14+
## [0.12.0] - 2025-11-04
1315
### Added
1416
- `calculateOppositeLineEndingHash()` method in `WinnowingUtils` to compute hash with opposite line endings (Unix ↔ Windows)
1517
- FH2 hash included in WFP output format as `fh2=<hash>`
1618
- Support for detecting CRLF (Windows), LF (Unix), and CR (legacy Mac) line endings
19+
### Fixed
20+
- Fixed WFP parsing issue
1721

1822
## [0.11.0] - 2025-05-26
1923
### Added
@@ -126,4 +130,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
126130
[0.9.0]: https://github.com/scanoss/scanoss.java/compare/v0.8.1...v0.9.0
127131
[0.10.0]: https://github.com/scanoss/scanoss.java/compare/v0.9.0...v0.10.0
128132
[0.10.1]: https://github.com/scanoss/scanoss.java/compare/v0.10.0...v0.10.1
129-
[0.11.0]: https://github.com/scanoss/scanoss.java/compare/v0.10.1...v0.11.0
133+
[0.11.0]: https://github.com/scanoss/scanoss.java/compare/v0.10.1...v0.11.0
134+
[0.12.0]: https://github.com/scanoss/scanoss.java/compare/v0.11.0...v0.12.0

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>com.scanoss</groupId>
88
<artifactId>scanoss</artifactId>
9-
<version>0.11.0</version>
9+
<version>0.12.0</version>
1010
<packaging>jar</packaging>
1111
<name>scanoss.java</name>
1212
<url>https://github.com/scanoss/scanoss.java</url>

src/main/java/com/scanoss/Winnowing.java

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,16 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
154154
}
155155

156156
wfpBuilder.append(String.format("file=%s,%d,%s\n", fileMD5, contents.length, filename));
157-
if (binFile || this.skipSnippets || this.skipSnippets(filename, fileContents)) {
158-
return wfpBuilder.toString();
159-
}
160157

161158
String fh2 = WinnowingUtils.calculateOppositeLineEndingHash(contents);
162159
if (fh2 != null){
163160
wfpBuilder.append(String.format("fh2=%s\n",fh2));
164161
}
165162

163+
if (binFile || this.skipSnippets || this.skipSnippets(filename, fileContents)) {
164+
return wfpBuilder.toString();
165+
}
166+
166167
if(this.isHpsm()){
167168
wfpBuilder.append(String.format("hpsm=%s\n", Hpsm.calcHpsm(contents)));
168169
}
@@ -194,11 +195,7 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
194195
if (lastLine != line) {
195196
int obLength = outputBuilder.length();
196197
if (obLength > 0) {
197-
if (snippetLimit > 0 && obLength > snippetLimit) {
198-
log.debug("Skipping snippet line as it's too big ({}): {}", filename, outputBuilder);
199-
} else {
200-
wfpBuilder.append(outputBuilder).append("\n");
201-
}
198+
wfpBuilder.append(outputBuilder).append("\n");
202199
}
203200
outputBuilder.delete(0, obLength);
204201
outputBuilder.append(String.format("%d=%s", line, minHashHex));
@@ -216,11 +213,7 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
216213
}
217214
int obLength = outputBuilder.length();
218215
if (obLength > 0) {
219-
if (snippetLimit > 0 && obLength > snippetLimit) {
220-
log.debug("Skipping snippet line as it's too big ({}) {} - {}: {}", filename, snippetLimit, obLength, outputBuilder);
221-
} else {
222-
wfpBuilder.append(outputBuilder).append("\n");
223-
}
216+
wfpBuilder.append(outputBuilder).append("\n");
224217
}
225218
return wfpBuilder.toString();
226219
}
@@ -288,6 +281,21 @@ private Boolean skipSnippets(@NonNull String filename, char[] contents) {
288281
}
289282
}
290283
}
284+
// Check if first line is too long (matches Python implementation)
285+
int firstLineEnd = 0;
286+
for (int i = 0; i < contents.length; i++) {
287+
if (contents[i] == '\n') {
288+
firstLineEnd = i;
289+
break;
290+
}
291+
}
292+
if (firstLineEnd == 0) {
293+
firstLineEnd = contents.length; // No newline found, use entire content length
294+
}
295+
if (snippetLimit > 0 && firstLineEnd > snippetLimit) {
296+
log.trace("Skipping snippets due to first line being too long: {} - {} chars", filename, firstLineEnd);
297+
return true;
298+
}
291299
return false;
292300
}
293301

src/test/java/com/scanoss/TestWinnowing.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,13 @@ public void TestWinnowingFileSkipSnippets() {
165165
String wfp = winnowing.wfpForFile(file, file);
166166
log.info("WFP for Json: {}", wfp);
167167
assertNotNull("Expected a result from WFP", wfp);
168-
assertEquals("file=f8d52217f24ea77ff80a6b1f421d0959,229084,testing/data/non-source.json", wfp.trim());
168+
assertEquals("file=f8d52217f24ea77ff80a6b1f421d0959,229084,testing/data/non-source.json\nfh2=dcae9929f4436808df739f19804cb4d2", wfp.trim());
169169

170170
file = "testing/data/test-file.txt";
171171
wfp = winnowing.wfpForFile(file, file);
172172
log.info("WFP for Json: {}", wfp);
173173
assertNotNull("Expected a result from WFP", wfp);
174-
assertEquals("file=e3dd1a7915d51c8cd1498585e6cea41e,183,testing/data/test-file.txt", wfp.trim());
174+
assertEquals("file=e3dd1a7915d51c8cd1498585e6cea41e,183,testing/data/test-file.txt\nfh2=7de74202074d60759e60f408391e70c4", wfp.trim());
175175

176176
file = "testing/data/too-small.c";
177177
wfp = winnowing.wfpForFile(file, file);
@@ -183,7 +183,7 @@ public void TestWinnowingFileSkipSnippets() {
183183
wfp = winnowing.wfpForFile(file, file);
184184
log.info("WFP for Json: {}", wfp);
185185
assertNotNull("Expected a result from WFP", wfp);
186-
assertEquals("file=d7cfce9cff6d109c6b0249233ee26368,345,testing/data/json-file.c", wfp.trim());
186+
assertEquals("file=d7cfce9cff6d109c6b0249233ee26368,345,testing/data/json-file.c\nfh2=56c80b467d3bcc13da74943a82c69724", wfp.trim());
187187

188188
file = "testing/data/source-file-with-long-line.c";
189189
wfp = winnowing.wfpForFile(file, file);

0 commit comments

Comments
 (0)