31
31
import click
32
32
from PIL import Image
33
33
from importlib import resources
34
+
35
+ from rich import print
36
+ from rich .tree import Tree
37
+ from rich .table import Table
38
+ from rich .console import Group
34
39
from rich .traceback import install
40
+ from rich .logging import RichHandler
41
+ from rich .markdown import Markdown
42
+ from rich .progress import Progress
35
43
36
44
from kraken .lib import log
37
45
@@ -677,29 +685,90 @@ def ocr(ctx, model, pad, reorder, base_dir, no_segmentation, text_direction):
677
685
678
686
@cli .command ('show' )
679
687
@click .pass_context
688
+ @click .option ('-V' , '--metadata-version' ,
689
+ default = 'highest' ,
690
+ type = click .Choice (['v0' , 'v1' , 'highest' ]),
691
+ help = 'Version of metadata to fetch if multiple exist in repository.' )
680
692
@click .argument ('model_id' )
681
- def show (ctx , model_id ):
693
+ def show (ctx , metadata_version , model_id ):
682
694
"""
683
695
Retrieves model metadata from the repository.
684
696
"""
685
- from kraken import repo
697
+ from htrmopo import get_description
698
+ from htrmopo .util import iso15924_to_name , iso639_3_to_name
686
699
from kraken .lib .util import is_printable , make_printable
687
700
688
- desc = repo .get_description (model_id )
701
+ def _render_creators (creators ):
702
+ o = []
703
+ for creator in creators :
704
+ c_text = creator ['name' ]
705
+ if (orcid := creator .get ('orcid' , None )) is not None :
706
+ c_text += f' ({ orcid } )'
707
+ if (affiliation := creator .get ('affiliation' , None )) is not None :
708
+ c_text += f' ({ affiliation } )'
709
+ o .append (c_text )
710
+ return o
689
711
690
- chars = []
691
- combining = []
692
- for char in sorted (desc ['graphemes' ]):
693
- if not is_printable (char ):
694
- combining .append (make_printable (char ))
695
- else :
696
- chars .append (char )
697
- message (
698
- 'name: {}\n \n {}\n \n {}\n scripts: {}\n alphabet: {} {}\n accuracy: {:.2f}%\n license: {}\n author(s): {}\n date: {}' .format (
699
- model_id , desc ['summary' ], desc ['description' ], ' ' .join (
700
- desc ['script' ]), '' .join (chars ), ', ' .join (combining ), desc ['accuracy' ], desc ['license' ]['id' ], '; ' .join (
701
- x ['name' ] for x in desc ['creators' ]), desc ['publication_date' ]))
702
- ctx .exit (0 )
712
+ def _render_metrics (metrics ):
713
+ return [f'{ k } : { v :.2f} ' for k , v in metrics .items ()]
714
+
715
+ if metadata_version == 'highest' :
716
+ metadata_version = None
717
+
718
+ try :
719
+ desc = get_description (model_id , version = metadata_version )
720
+ except ValueError as e :
721
+ logger .error (e )
722
+ ctx .exit (1 )
723
+
724
+ if getattr (desc , 'software_name' , None ) != 'kraken' or 'kraken_pytorch' not in desc .keywords :
725
+ logger .error ('Record exists but is not a kraken-compatible model' )
726
+ ctx .exit (1 )
727
+
728
+ if desc .version == 'v0' :
729
+ chars = []
730
+ combining = []
731
+ for char in sorted (desc .graphemes ):
732
+ if not is_printable (char ):
733
+ combining .append (make_printable (char ))
734
+ else :
735
+ chars .append (char )
736
+
737
+ table = Table (title = desc .summary , show_header = False )
738
+ table .add_column ('key' , justify = "left" , no_wrap = True )
739
+ table .add_column ('value' , justify = "left" , no_wrap = False )
740
+ table .add_row ('DOI' , desc .doi )
741
+ table .add_row ('concept DOI' , desc .concept_doi )
742
+ table .add_row ('publication date' , desc .publication_date .isoformat ())
743
+ table .add_row ('model type' , Group (* desc .model_type ))
744
+ table .add_row ('script' , Group (* [iso15924_to_name (x ) for x in desc .script ]))
745
+ table .add_row ('alphabet' , Group (' ' .join (chars ), ', ' .join (combining )))
746
+ table .add_row ('keywords' , Group (* desc .keywords ))
747
+ table .add_row ('metrics' , Group (* _render_metrics (desc .metrics )))
748
+ table .add_row ('license' , desc .license )
749
+ table .add_row ('creators' , Group (* _render_creators (desc .creators )))
750
+ table .add_row ('description' , desc .description )
751
+ elif desc .version == 'v1' :
752
+ table = Table (title = desc .summary , show_header = False )
753
+ table .add_column ('key' , justify = "left" , no_wrap = True )
754
+ table .add_column ('value' , justify = "left" , no_wrap = False )
755
+ table .add_row ('DOI' , desc .doi )
756
+ table .add_row ('concept DOI' , desc .concept_doi )
757
+ table .add_row ('publication date' , desc .publication_date .isoformat ())
758
+ table .add_row ('model type' , Group (* desc .model_type ))
759
+ table .add_row ('language' , Group (* [iso639_3_to_name (x ) for x in desc .language ]))
760
+ table .add_row ('script' , Group (* [iso15924_to_name (x ) for x in desc .script ]))
761
+ table .add_row ('keywords' , Group (* desc .keywords ))
762
+ table .add_row ('datasets' , Group (* desc .datasets ))
763
+ table .add_row ('metrics' , Group (* _render_metrics (desc .metrics )))
764
+ table .add_row ('base model' , Group (* desc .base_model ))
765
+ table .add_row ('software' , desc .software_name )
766
+ table .add_row ('software_hints' , Group (* desc .software_hints ))
767
+ table .add_row ('license' , desc .license )
768
+ table .add_row ('creators' , Group (* _render_creators (desc .creators )))
769
+ table .add_row ('description' , Markdown (desc .description ))
770
+
771
+ print (table )
703
772
704
773
705
774
@cli .command ('list' )
@@ -708,14 +777,41 @@ def list_models(ctx):
708
777
"""
709
778
Lists models in the repository.
710
779
"""
711
- from kraken import repo
780
+ from htrmopo import get_listing
781
+ from collections import defaultdict
712
782
from kraken .lib .progress import KrakenProgressBar
713
783
714
784
with KrakenProgressBar () as progress :
715
785
download_task = progress .add_task ('Retrieving model list' , total = 0 , visible = True if not ctx .meta ['verbose' ] else False )
716
- model_list = repo .get_listing (lambda total , advance : progress .update (download_task , total = total , advance = advance ))
717
- for id , metadata in model_list .items ():
718
- message ('{} ({}) - {}' .format (id , ', ' .join (metadata ['type' ]), metadata ['summary' ]))
786
+ repository = get_listing (lambda total , advance : progress .update (download_task , total = total , advance = advance ))
787
+ # aggregate models under their concept DOI
788
+ concepts = defaultdict (list )
789
+ for item in repository .values ():
790
+ # both got the same DOI information
791
+ record = item ['v0' ] if item ['v0' ] else item ['v1' ]
792
+ concepts [record .concept_doi ].append (record .doi )
793
+
794
+ table = Table (show_header = True )
795
+ table .add_column ('DOI' , justify = "left" , no_wrap = True )
796
+ table .add_column ('summary' , justify = "left" , no_wrap = False )
797
+ table .add_column ('model type' , justify = "left" , no_wrap = False )
798
+ table .add_column ('keywords' , justify = "left" , no_wrap = False )
799
+
800
+ for k , v in concepts .items ():
801
+ records = [repository [x ]['v1' ] if 'v1' in repository [x ] else repository [x ]['v0' ] for x in v ]
802
+ records = filter (lambda record : getattr (record , 'software_name' , None ) != 'kraken' or 'kraken_pytorch' not in record .keywords , records )
803
+ records = sorted (records , key = lambda x : x .publication_date , reverse = True )
804
+ if not len (records ):
805
+ continue
806
+
807
+ t = Tree (k )
808
+ [t .add (x .doi ) for x in records ]
809
+ table .add_row (t ,
810
+ Group (* ['' ] + [x .summary for x in records ]),
811
+ Group (* ['' ] + ['; ' .join (x .model_type ) for x in records ]),
812
+ Group (* ['' ] + ['; ' .join (x .keywords ) for x in records ]))
813
+
814
+ print (table )
719
815
ctx .exit (0 )
720
816
721
817
0 commit comments