@@ -1174,7 +1174,7 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA
1174
1174
foreach ( int location in cleavageMotifLocations )
1175
1175
{
1176
1176
char [ ] motifArray = BaseSequence . Substring ( location , cleavingMotif . Length ) . ToCharArray ( ) ;
1177
-
1177
+
1178
1178
for ( int i = 0 ; i < cleavingMotif . Length ; i ++ )
1179
1179
{
1180
1180
newBase [ location + i ] = motifArray [ i ] ;
@@ -1191,8 +1191,9 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA
1191
1191
}
1192
1192
}
1193
1193
1194
- //We've kept amino acids in the digestion motif in the same position in the decoy peptide.
1195
- //Now we will fill the remaining open positions in the decoy with the reverse of amino acids from the target.
1194
+ // We've kept amino acids in the digestion motif in the same position in the decoy peptide.
1195
+ // Now we will fill the remaining open positions in the decoy with the reverse of amino acids from the target.
1196
+ // Part to change to scramble
1196
1197
int fillPosition = 0 ;
1197
1198
int extractPosition = this . BaseSequence . Length - 1 ;
1198
1199
while ( fillPosition < this . BaseSequence . Length && extractPosition >= 0 )
@@ -1250,7 +1251,222 @@ public PeptideWithSetModifications GetReverseDecoyFromTarget(int[] revisedAminoA
1250
1251
}
1251
1252
1252
1253
}
1254
+ /// <summary>
1255
+ /// This function generates a decoy peptide from a target by scrambling the target peptide's amino acid sequence
1256
+ /// This preserves any digestion motifs and keeps modifications with their amino acids
1257
+ /// To help generate only high quality decoys, a homology cutoff of 30 % sequence similarity is used
1258
+ /// If after 10 attempts no sufficient decoy is generated, the mirror sequence is returned
1259
+ /// </summary>
1260
+ /// <param name="revisedAminoAcidOrder">Array to store the new amino acid order in</param>
1261
+ /// <param name="maximumHomology">Parameter specifying the homology cutoff to be used</param>
1262
+ /// <returns></returns>
1263
+ public PeptideWithSetModifications GetScrambledDecoyFromTarget ( int [ ] revisedAminoAcidOrder , double maximumHomology = 0.3 )
1264
+ {
1265
+ Dictionary < int , Modification > newModificationsDictionary = new Dictionary < int , Modification > ( ) ;
1266
+ //Copy N-terminal modifications from target dictionary to decoy dictionary.
1267
+ if ( this . AllModsOneIsNterminus . ContainsKey ( 1 ) )
1268
+ {
1269
+ newModificationsDictionary . Add ( 1 , this . AllModsOneIsNterminus [ 1 ] ) ;
1270
+ }
1271
+ char [ ] newBase = new char [ this . BaseSequence . Length ] ;
1272
+ Array . Fill ( newBase , '0' ) ;
1273
+ char [ ] evaporatingBase = this . BaseSequence . ToCharArray ( ) ;
1274
+ List < DigestionMotif > motifs = this . DigestionParams . Protease . DigestionMotifs ;
1275
+ if ( motifs != null && motifs . Count > 0 )
1276
+ {
1277
+ foreach ( var motif in motifs . Where ( m => m . InducingCleavage != "" ) ) //check the empty "" for topdown
1278
+ {
1279
+ string cleavingMotif = motif . InducingCleavage ;
1280
+ List < int > cleavageMotifLocations = new List < int > ( ) ;
1281
+
1282
+ for ( int i = 0 ; i < BaseSequence . Length ; i ++ )
1283
+ {
1284
+ bool fits ;
1285
+ bool prevents ;
1286
+ ( fits , prevents ) = motif . Fits ( BaseSequence , i ) ;
1287
+
1288
+ if ( fits && ! prevents )
1289
+ {
1290
+ cleavageMotifLocations . Add ( i ) ;
1291
+ }
1292
+ }
1293
+
1294
+ foreach ( int location in cleavageMotifLocations )
1295
+ {
1296
+ char [ ] motifArray = BaseSequence . Substring ( location , cleavingMotif . Length ) . ToCharArray ( ) ;
1297
+
1298
+ for ( int i = 0 ; i < cleavingMotif . Length ; i ++ )
1299
+ {
1300
+ newBase [ location + i ] = motifArray [ i ] ;
1301
+ revisedAminoAcidOrder [ location + i ] = location + i ;
1302
+ //directly copy mods that were on amino acids in the motif. Those amino acids don't change position.
1303
+ if ( this . AllModsOneIsNterminus . ContainsKey ( location + i + 2 ) )
1304
+ {
1305
+ newModificationsDictionary . Add ( location + i + 2 , this . AllModsOneIsNterminus [ location + i + 2 ] ) ;
1306
+ }
1307
+
1308
+ evaporatingBase [ location + i ] = '0' ; //can null a char so i use a number which doesnt' appear in peptide string
1309
+ }
1310
+ }
1311
+ }
1312
+ }
1313
+
1314
+ //We've kept amino acids in the digestion motif in the same position in the decoy peptide.
1315
+ //Now we will fill the remaining open positions in the decoy with the scrambled amino acids from the target.
1316
+ int extractPosition ;
1317
+ int fillPosition ;
1318
+ int residueNumsIndex ;
1319
+ // Specify seed to ensure that the same decoy sequence is always generated from the target
1320
+ Random rand = new ( 56 ) ;
1321
+ double percentIdentity = 1 ;
1322
+ int scrambleAttempt = 0 ;
1323
+ int maxScrambles = 10 ;
1324
+ double maxIdentity = maximumHomology ;
1325
+ int characterCounter ;
1326
+
1327
+ while ( scrambleAttempt < maxScrambles && percentIdentity > maxIdentity )
1328
+ {
1329
+ // Copies the newModificationsDictionary for the scramble attempt
1330
+ Dictionary < int , Modification > tempModificationsDictionary = new ( newModificationsDictionary ) ;
1331
+ fillPosition = 0 ;
1332
+ // residueNums is a list containing array indices for each element of evaporatingBase
1333
+ // Once each amino acid is added, its index is removed from residueNums to prevent the same AA from being added 2x
1334
+ var residueNums = Enumerable . Range ( 0 , evaporatingBase . Length ) . ToList ( ) ;
1335
+ characterCounter = 0 ;
1336
+ char [ ] tempNewBase = new char [ newBase . Length ] ;
1337
+ // Create a copy of the newBase character array for the scrambling attempt
1338
+ Array . Copy ( newBase , tempNewBase , newBase . Length ) ;
1339
+
1340
+ // I am not sure why I need the second counter, but it always works when I have it
1341
+ int seqLength = this . BaseSequence . Length ;
1342
+ while ( fillPosition < seqLength && characterCounter < seqLength )
1343
+ {
1344
+ residueNumsIndex = rand . Next ( residueNums . Count ) ;
1345
+ extractPosition = residueNums [ residueNumsIndex ] ;
1346
+ char targetAA = evaporatingBase [ extractPosition ] ;
1347
+ residueNums . RemoveAt ( residueNumsIndex ) ;
1348
+ if ( targetAA != '0' )
1349
+ {
1350
+ while ( tempNewBase [ fillPosition ] != '0' )
1351
+ {
1352
+ fillPosition ++ ;
1353
+ }
1354
+ tempNewBase [ fillPosition ] = targetAA ;
1355
+ revisedAminoAcidOrder [ fillPosition ] = extractPosition ;
1356
+ if ( this . AllModsOneIsNterminus . ContainsKey ( extractPosition + 2 ) )
1357
+ {
1358
+ tempModificationsDictionary . Add ( fillPosition + 2 , this . AllModsOneIsNterminus [ extractPosition + 2 ] ) ;
1359
+ }
1360
+ fillPosition ++ ;
1361
+ }
1362
+ characterCounter ++ ;
1363
+ }
1364
+ scrambleAttempt ++ ;
1365
+ /*
1366
+ * Any homology scoring mechanism can go here, percent identity is probably not the best
1367
+ * In terms of generating a decoy sequence that will have a different mass spectrum than
1368
+ * the original, it is far more important to vary the amino acids on the edges than
1369
+ * those in the middle. Changes on the edges will offset the entire b and y sequences
1370
+ * leading to an effective decoy spectrum even if there is high identity in the middle of
1371
+ * the sequence. Additionally, for peptides with a large amount of a certain amino acid,
1372
+ * it will be very difficult to generate a low homology sequence.
1373
+ */
1374
+ percentIdentity = GetPercentIdentity ( tempNewBase , evaporatingBase , tempModificationsDictionary , this . AllModsOneIsNterminus ) ;
1375
+ // Check that the percent identity is below the maximum identity threshold and set actual values to the temporary values
1376
+ if ( percentIdentity < maxIdentity )
1377
+ {
1378
+ newBase = tempNewBase ;
1379
+ newModificationsDictionary = tempModificationsDictionary ;
1380
+ // Code checking similarity between theoretical spectra could go here
1381
+ }
1253
1382
1383
+ // If max scrambles are reached, make the new sequence identical to the original to trigger mirroring
1384
+ else if ( scrambleAttempt == maxScrambles )
1385
+ {
1386
+ for ( int j = 0 ; j < newBase . Length ; j ++ )
1387
+ {
1388
+ if ( newBase [ j ] == '0' )
1389
+ {
1390
+ newBase [ j ] = evaporatingBase [ j ] ;
1391
+ }
1392
+ }
1393
+ }
1394
+ }
1395
+
1396
+
1397
+ string newBaseString = new string ( newBase ) ;
1398
+
1399
+ var proteinSequence = this . Protein . BaseSequence ;
1400
+ var aStringBuilder = new StringBuilder ( proteinSequence ) ;
1401
+ aStringBuilder . Remove ( this . OneBasedStartResidueInProtein - 1 , this . BaseSequence . Length ) ;
1402
+ aStringBuilder . Insert ( this . OneBasedStartResidueInProtein - 1 , newBaseString ) ;
1403
+ proteinSequence = aStringBuilder . ToString ( ) ;
1404
+
1405
+ Protein decoyProtein = new Protein ( proteinSequence , "DECOY_" + this . Protein . Accession , null , new List < Tuple < string , string > > ( ) , new Dictionary < int , List < Modification > > ( ) , null , null , null , true ) ;
1406
+ DigestionParams d = this . DigestionParams ;
1407
+ // Creates a hash code corresponding to the target's sequence
1408
+ int targetHash = GetHashCode ( ) ;
1409
+ PeptideWithSetModifications decoyPeptide ;
1410
+ //Make the "peptideDescription" store the corresponding target's sequence
1411
+ if ( newBaseString != this . BaseSequence )
1412
+ {
1413
+ decoyPeptide = new PeptideWithSetModifications ( decoyProtein , d , this . OneBasedStartResidueInProtein , this . OneBasedEndResidueInProtein , this . CleavageSpecificityForFdrCategory , this . FullSequence , this . MissedCleavages , newModificationsDictionary , this . NumFixedMods , newBaseString ) ;
1414
+ // Sets PairedTargetDecoyHash of the original target peptie to the hash hode of the decoy sequence
1415
+ PairedTargetDecoyHash = decoyPeptide . GetHashCode ( ) ;
1416
+ // Sets PairedTargetDecoyHash of the decoy peptide to the hash code of the target sequence
1417
+ decoyPeptide . PairedTargetDecoyHash = targetHash ;
1418
+ return decoyPeptide ;
1419
+
1420
+ }
1421
+ else
1422
+ {
1423
+ //The reverse decoy procedure failed to create a PeptideWithSetModificatons with a different sequence. Therefore,
1424
+ //we retrun the mirror image peptide.
1425
+ decoyPeptide = this . GetPeptideMirror ( revisedAminoAcidOrder ) ;
1426
+ PairedTargetDecoyHash = decoyPeptide . GetHashCode ( ) ;
1427
+ decoyPeptide . PairedTargetDecoyHash = targetHash ;
1428
+ return decoyPeptide ;
1429
+ }
1430
+ }
1431
+
1432
+ /// <summary>
1433
+ /// Method to get the percent identity between two peptide sequences stored as char[]
1434
+ /// </summary>
1435
+ /// <param name="scrambledSequence">Character array of the scrambled sequence</param>
1436
+ /// <param name="unscrambledSequence">Character array of the unscrambled sequence</param>
1437
+ /// <param name="scrambledMods">Dictionary containing the scrambled sequence's modifications</param>
1438
+ /// <param name="unscrambledMods">Dictionary containing the unscrambled sequence's modifications</param>
1439
+ /// <returns></returns>
1440
+ private static double GetPercentIdentity ( char [ ] scrambledSequence , char [ ] unscrambledSequence , Dictionary < int , Modification > scrambledMods , Dictionary < int , Modification > unscrambledMods )
1441
+ {
1442
+ double rawScore = 0 ;
1443
+ int seqLength = scrambledSequence . Length ;
1444
+ for ( int i = 0 ; i < seqLength ; i ++ )
1445
+ {
1446
+ if ( scrambledSequence [ i ] == unscrambledSequence [ i ] || unscrambledSequence [ i ] == '0' )
1447
+ {
1448
+ Modification scrambledMod ;
1449
+ if ( scrambledMods . TryGetValue ( i + 2 , out scrambledMod ) && unscrambledSequence [ i ] != '0' )
1450
+ {
1451
+ Modification unscrambledMod ;
1452
+ if ( unscrambledMods . TryGetValue ( i + 2 , out unscrambledMod ) )
1453
+ {
1454
+ if ( scrambledMod == unscrambledMod )
1455
+ {
1456
+ rawScore += 1 ;
1457
+ }
1458
+ }
1459
+ }
1460
+ else
1461
+ {
1462
+ rawScore += 1 ;
1463
+ }
1464
+
1465
+ }
1466
+ }
1467
+ return rawScore / seqLength ;
1468
+ }
1469
+
1254
1470
//Returns a PeptideWithSetModifications mirror image. Used when reverse decoy sequence is same as target sequence
1255
1471
public PeptideWithSetModifications GetPeptideMirror ( int [ ] revisedOrderNisOne )
1256
1472
{
0 commit comments