@@ -154,15 +154,16 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
154154 }
155155
156156 wfpBuilder .append (String .format ("file=%s,%d,%s\n " , fileMD5 , contents .length , filename ));
157- if (binFile || this .skipSnippets || this .skipSnippets (filename , fileContents )) {
158- return wfpBuilder .toString ();
159- }
160157
161158 String fh2 = WinnowingUtils .calculateOppositeLineEndingHash (contents );
162159 if (fh2 != null ){
163160 wfpBuilder .append (String .format ("fh2=%s\n " ,fh2 ));
164161 }
165162
163+ if (binFile || this .skipSnippets || this .skipSnippets (filename , fileContents )) {
164+ return wfpBuilder .toString ();
165+ }
166+
166167 if (this .isHpsm ()){
167168 wfpBuilder .append (String .format ("hpsm=%s\n " , Hpsm .calcHpsm (contents )));
168169 }
@@ -194,11 +195,7 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
194195 if (lastLine != line ) {
195196 int obLength = outputBuilder .length ();
196197 if (obLength > 0 ) {
197- if (snippetLimit > 0 && obLength > snippetLimit ) {
198- log .debug ("Skipping snippet line as it's too big ({}): {}" , filename , outputBuilder );
199- } else {
200- wfpBuilder .append (outputBuilder ).append ("\n " );
201- }
198+ wfpBuilder .append (outputBuilder ).append ("\n " );
202199 }
203200 outputBuilder .delete (0 , obLength );
204201 outputBuilder .append (String .format ("%d=%s" , line , minHashHex ));
@@ -216,11 +213,7 @@ public String wfpForContents(@NonNull String filename, Boolean binFile, byte[] c
216213 }
217214 int obLength = outputBuilder .length ();
218215 if (obLength > 0 ) {
219- if (snippetLimit > 0 && obLength > snippetLimit ) {
220- log .debug ("Skipping snippet line as it's too big ({}) {} - {}: {}" , filename , snippetLimit , obLength , outputBuilder );
221- } else {
222- wfpBuilder .append (outputBuilder ).append ("\n " );
223- }
216+ wfpBuilder .append (outputBuilder ).append ("\n " );
224217 }
225218 return wfpBuilder .toString ();
226219 }
@@ -288,6 +281,21 @@ private Boolean skipSnippets(@NonNull String filename, char[] contents) {
288281 }
289282 }
290283 }
284+ // Check if first line is too long (matches Python implementation)
285+ int firstLineEnd = 0 ;
286+ for (int i = 0 ; i < contents .length ; i ++) {
287+ if (contents [i ] == '\n' ) {
288+ firstLineEnd = i ;
289+ break ;
290+ }
291+ }
292+ if (firstLineEnd == 0 ) {
293+ firstLineEnd = contents .length ; // No newline found, use entire content length
294+ }
295+ if (snippetLimit > 0 && firstLineEnd > snippetLimit ) {
296+ log .trace ("Skipping snippets due to first line being too long: {} - {} chars" , filename , firstLineEnd );
297+ return true ;
298+ }
291299 return false ;
292300 }
293301
0 commit comments