-
Notifications
You must be signed in to change notification settings - Fork 23
SB Delete repeats
Search through all sequences and remove any duplicates. On the first pass, any sequences with duplicate IDs are identified; the first instance of the ID is saved and the remaining sequences are deleted. On the second pass, all duplicate sequences are identified, irrespective of their IDs, and again the first is one found is retained while the others are removed.
The IDs of all deleted sequences is sent to stderr as a list (unless silenced with -q).
Optional. Restrict the search to only IDs or sequences. Valid options are 'ids', 'seqs', and 'all' (default is 'all').
Optional. Lists of deleted records are sent to stderr. If you want this information organized in multiple columns, specify how many by passing in an integer.
#NEXUS
begin data;
dimensions ntax=16 nchar=50;
format datatype=protein missing=? gap=-;
matrix
'Mle-Panxα12' -m--vidilsgf------------kgitpfkgitlddgwdqinrsfmfvl
'Mle-Panxα9' ----mldilskf------------kgvtpfkgitiddgwdqlnrsfmfvl
'Mle-Panxα10B' -m--rlsekstshdckacitrshnedcarrwgitiddgwdqlnrsfmfgl
'Mle-Panxα7A' -m--gveilfpi----------inratapiksvniddlssqlnrtfmfyl
'Mle-Panxα8' -m--vlevlalf------------prlapfkvitlddvwdqwnrsfmfim
'Mle-Panxα1' -mywifeicqei------------kraqscrkfaidgpfdwtnriimptl
'Mle-Panxα9' ----mldilskf------------kgvtpfkgitiddgwdqlnrsfmfvl
'Mle-Panxα2' -m--vldlisgs----------l-ngflkiksvsiddqwdqinrtylvmf
'Mle-Panxα5' -m--iywvwavf------------krmapfkvvtlddrwdqmnrsfmmpl
'Mle-Panxα4' -m--viellagy------------kglspfkdatvddswdqinrcyvfia
'Mle-Panxα3' ml--llgslgti------------knlsifkdlslddwldqmnrtfmfll
'Mle-Panxα6' -m--lleilanf------------kgatpfkeivlddkwdqinrcymfll
'Mle-Panxα8' ----mldilskf------------kgvtpfkgitiddgwdqlnrsfmfvl
'Mle-Panxα11' -m--lisslvqf------------srlspfkeitiddgwdqlnrsfmfvl
'Mle-Panxα10A' -m--rlsekstshdckacitrshnedcarrwgitiddgwdqlnrsfmfgl
'Mle-Panxα6' ----mldilskf------------kgvtpfkgitiddgwdqlnrsfmfvl
;
end;
$: sb Mle-Panx-C_terms.nex -drp
# ################################################################ #
# Records with duplicate ids deleted
Mle-Panxα9
Mle-Panxα8
Mle-Panxα6
# Records with duplicate sequence deleted
[Mle-Panxα10A, Mle-Panxα10B]
# ################################################################ #
#NEXUS
begin data;
dimensions ntax=12 nchar=50;
format datatype=protein missing=? gap=-;
matrix
'Mle-Panxα12' -m--vidilsgf------------kgitpfkgitlddgwdqinrsfmfvl
'Mle-Panxα7A' -m--gveilfpi----------inratapiksvniddlssqlnrtfmfyl
'Mle-Panxα1' -mywifeicqei------------kraqscrkfaidgpfdwtnriimptl
'Mle-Panxα2' -m--vldlisgs----------l-ngflkiksvsiddqwdqinrtylvmf
'Mle-Panxα5' -m--iywvwavf------------krmapfkvvtlddrwdqmnrsfmmpl
'Mle-Panxα4' -m--viellagy------------kglspfkdatvddswdqinrcyvfia
'Mle-Panxα3' ml--llgslgti------------knlsifkdlslddwldqmnrtfmfll
'Mle-Panxα11' -m--lisslvqf------------srlspfkeitiddgwdqlnrsfmfvl
'Mle-Panxα10A' -m--rlsekstshdckacitrshnedcarrwgitiddgwdqlnrsfmfgl
'Mle-Panxα9' ----mldilskf------------kgvtpfkgitiddgwdqlnrsfmfvl
'Mle-Panxα8' -m--vlevlalf------------prlapfkvitlddvwdqwnrsfmfim
'Mle-Panxα6' -m--lleilanf------------kgatpfkeivlddkwdqinrcymfll
;
end;
$: sb Mle-Panx-C_terms.nex -drp seqs
# ################################################################ #
# Records with duplicate sequence deleted
[Mle-Panxα10A, Mle-Panxα10B]
[Mle-Panxα9, Mle-Panxα9, Mle-Panxα8, Mle-Panxα6]
# ################################################################ #
#NEXUS
begin data;
dimensions ntax=9 nchar=50;
format datatype=protein missing=? gap=-;
matrix
'Mle-Panxα12' -m--vidilsgf------------kgitpfkgitlddgwdqinrsfmfvl
'Mle-Panxα7A' -m--gveilfpi----------inratapiksvniddlssqlnrtfmfyl
'Mle-Panxα1' -mywifeicqei------------kraqscrkfaidgpfdwtnriimptl
'Mle-Panxα2' -m--vldlisgs----------l-ngflkiksvsiddqwdqinrtylvmf
'Mle-Panxα5' -m--iywvwavf------------krmapfkvvtlddrwdqmnrsfmmpl
'Mle-Panxα4' -m--viellagy------------kglspfkdatvddswdqinrcyvfia
'Mle-Panxα3' ml--llgslgti------------knlsifkdlslddwldqmnrtfmfll
'Mle-Panxα11' -m--lisslvqf------------srlspfkeitiddgwdqlnrsfmfvl
'Mle-Panxα10A' -m--rlsekstshdckacitrshnedcarrwgitiddgwdqlnrsfmfgl
;
end;
$: sb Mle-Panx-C_terms.nex -drp ids 2
# ################################################################ #
# Records with duplicate ids deleted
Mle-Panxα9 Mle-Panxα8
Mle-Panxα6
# ################################################################ #
#NEXUS
begin data;
dimensions ntax=12 nchar=50;
format datatype=protein missing=? gap=-;
matrix
'Mle-Panxα12' -m--vidilsgf------------kgitpfkgitlddgwdqinrsfmfvl
'Mle-Panxα7A' -m--gveilfpi----------inratapiksvniddlssqlnrtfmfyl
'Mle-Panxα1' -mywifeicqei------------kraqscrkfaidgpfdwtnriimptl
'Mle-Panxα2' -m--vldlisgs----------l-ngflkiksvsiddqwdqinrtylvmf
'Mle-Panxα5' -m--iywvwavf------------krmapfkvvtlddrwdqmnrsfmmpl
'Mle-Panxα4' -m--viellagy------------kglspfkdatvddswdqinrcyvfia
'Mle-Panxα3' ml--llgslgti------------knlsifkdlslddwldqmnrtfmfll
'Mle-Panxα11' -m--lisslvqf------------srlspfkeitiddgwdqlnrsfmfvl
'Mle-Panxα10A' -m--rlsekstshdckacitrshnedcarrwgitiddgwdqlnrsfmfgl
'Mle-Panxα9' ----mldilskf------------kgvtpfkgitiddgwdqlnrsfmfvl
'Mle-Panxα8' -m--vlevlalf------------prlapfkvitlddvwdqwnrsfmfim
'Mle-Panxα6' -m--lleilanf------------kgatpfkeivlddkwdqinrcymfll
;
end;