22
33import os
44import copy
5+ import json
56from collections import defaultdict
67import numpy as np
78
1112from pandas import compat , isnull
1213from pandas import Series , DataFrame , to_datetime
1314from pandas .io .common import get_filepath_or_buffer , _get_handle
15+ from pandas .core import config
1416from pandas .core .common import AbstractMethodError
1517from pandas .formats .printing import pprint_thing
16-
18+ from pandas .types .common import (
19+ is_integer_dtype , is_timedelta64_dtype , is_string_dtype , is_numeric_dtype ,
20+ is_bool_dtype , is_datetime64_dtype
21+ )
1722loads = _json .loads
1823dumps = _json .dumps
1924
@@ -61,6 +66,22 @@ def __init__(self, obj, orient, date_format, double_precision,
6166 if orient is None :
6267 orient = self ._default_orient
6368
69+ self .is_jsontable_schema = orient == 'jsontable_schema'
70+ if self .is_jsontable_schema :
71+ self .schema = to_json_schema (obj )
72+
73+ # XXX: Do this timedelta properly in to_json
74+ sample = obj .head (
75+ config .get_option ('display.max_rows' )).reset_index ()
76+ timedeltas = sample .select_dtypes (include = ['timedelta' ]).columns
77+ sample [timedeltas ] = sample [timedeltas ].applymap (isoformat )
78+ self .obj = sample
79+ date_format = 'iso' # ignoring user input, but epoch not allowed
80+ orient = 'records'
81+
82+ else :
83+ self .schema = None
84+
6485 self .orient = orient
6586 self .date_format = date_format
6687 self .double_precision = double_precision
@@ -75,14 +96,19 @@ def _format_axes(self):
7596 raise AbstractMethodError (self )
7697
7798 def write (self ):
78- return dumps (
99+ serialized = dumps (
79100 self .obj ,
80101 orient = self .orient ,
81102 double_precision = self .double_precision ,
82103 ensure_ascii = self .ensure_ascii ,
83104 date_unit = self .date_unit ,
84105 iso_dates = self .date_format == 'iso' ,
85- default_handler = self .default_handler )
106+ default_handler = self .default_handler
107+ )
108+ if self .is_jsontable_schema :
109+ serialized = '{{"schema": {}, "data": {}}}' .format (
110+ json .dumps (self .schema ), serialized )
111+ return serialized
86112
87113
88114class SeriesWriter (Writer ):
@@ -884,10 +910,6 @@ def _recursive_extract(data, path, seen_meta, level=0):
884910
885911 return result
886912
887- # ---------------------------------------------------------------------
888- # JSON-Table Schema routines
889- # http://specs.frictionlessdata.io/json-table-schema/
890-
891913
892914# TODO: Make method on Timedelta?
893915def isoformat (x ):
@@ -909,3 +931,134 @@ def isoformat(x):
909931 tpl = 'Pn{td.days}Tn{td.hours}n{td.minutes}n{seconds}' .format (
910932 td = components , seconds = seconds )
911933 return tpl
934+
935+ # ---------------------------------------------------------------------
936+ # JSON-Table Schema routines
937+ # http://specs.frictionlessdata.io/json-table-schema/
938+
939+
940+ def as_jsontable_type (x ):
941+ """
942+ Convert a NumPy / pandas type to its corresponding jsontable type
943+
944+ ============== ======================
945+ Pandas type JSON Table Schema type
946+ ============== ======================
947+ int64 integer
948+ float64 number
949+ bool boolean
950+ datetime64[ns] date
951+ timedelta64[ns] duration
952+ =============== ======================
953+ """
954+ if is_integer_dtype (x ):
955+ return 'integer'
956+ elif is_bool_dtype (x ):
957+ return 'boolean'
958+ elif is_numeric_dtype (x ):
959+ return 'number'
960+ elif is_datetime64_dtype (x ):
961+ return 'date'
962+ elif is_timedelta64_dtype (x ):
963+ return 'duration'
964+ elif is_string_dtype (x ):
965+ return 'string'
966+ else :
967+ return 'any'
968+
969+
970+ def _set_default_names (data ):
971+ """Sets index names to 'index' for regular, or 'level_x' for Multi"""
972+ if all (name is not None for name in data .index .names ):
973+ return data
974+
975+ data = data .copy ()
976+ if data .index .nlevels > 1 :
977+ names = [name if name is not None else 'level_{}' .format (i )
978+ for i , name in enumerate (data .index .names )]
979+ data .index .names = names
980+ else :
981+ data .index .name = 'index'
982+ return data
983+
984+
985+ def to_json_schema (data , index = True , primary_key = None ):
986+ """
987+ Create a JSON Table schema from ``data``.
988+
989+ Parameters
990+ ----------
991+ data : Series, DataFrame
992+ index : bool
993+ Whether to include ``data.index`` in the schema.
994+ primary_key : bool or None
995+ column names to designate as the primary key.
996+ The default `None` will set `'primary_key'` to the index
997+ level or levels if the index is unique.
998+
999+ Returns
1000+ -------
1001+ schema : dict
1002+
1003+ Examples
1004+ --------
1005+ >>> df = pd.DataFrame(
1006+ ... {'A': [1, 2, 3],
1007+ ... 'B': ['a', 'b', 'c'],
1008+ ... 'C': pd.date_range('2016-01-01', freq='d', periods=3),
1009+ ... }, index=pd.Index(range(3), name='idx'))
1010+ >>> pd.to_json_schema(df)
1011+ {'fields': [{'name': 'idx', 'type': 'integer'},
1012+ {'name': 'A', 'type': 'integer'},
1013+ {'name': 'B', 'type': 'string'},
1014+ {'name': 'C', 'type': 'date'}],
1015+ 'primary_key': 'idx'}
1016+
1017+ Notes
1018+ -----
1019+ See `as_jsontable_type` for conversion types.
1020+ Timedeltas as converted to ISO8601 duration format with
1021+ 9 decimal places after the secnods field for nanosecond precision.
1022+ """
1023+ if index is True :
1024+ data = _set_default_names (data )
1025+
1026+ schema = {}
1027+ fields = []
1028+
1029+ if index :
1030+ if data .index .nlevels > 1 :
1031+ for level in data .index .levels :
1032+ fields .append ({'name' : level .name ,
1033+ 'type' : as_jsontable_type (level .dtype )})
1034+ else :
1035+ fields .append ({'name' : data .index .name ,
1036+ 'type' : as_jsontable_type (data .index .dtype )})
1037+
1038+ if data .ndim > 1 :
1039+ for column , type_ in data .dtypes .iteritems ():
1040+ fields .append ({'name' : column ,
1041+ 'type' : as_jsontable_type (type_ )})
1042+ else :
1043+ fields .append ({
1044+ 'name' : data .name if data .name is not None else 'values' ,
1045+ 'type' : as_jsontable_type (data .dtype )})
1046+
1047+ schema ['fields' ] = fields
1048+ if index and data .index .is_unique and primary_key is None :
1049+ # TODO: Always a list, spec allows for a string scalar.
1050+ if data .index .nlevels == 1 :
1051+ schema ['primary_key' ] = data .index .name
1052+ else :
1053+ schema ['primary_key' ] = data .index .names
1054+ elif primary_key is not None :
1055+ schema ['primary_key' ] = primary_key
1056+ return schema
1057+
1058+
1059+ def publish_tableschema (data ):
1060+ """Temporary helper for testing w/ frontend"""
1061+ from IPython .display import display
1062+ mimetype = 'application/vnd.tableschema.v1+json'
1063+ payload = data .to_json (orient = 'jsontable_schema' )
1064+ display ({mimetype : payload }, raw = True )
0 commit comments