From 7f3dd13d03f073846f10709dab953e92c584285a Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 17 Sep 2022 03:03:40 +0200 Subject: [PATCH] Add NWB support III - provenance writing (#17) * added tests for provenance writting * MyPy compliance --- sleap_io/io/nwb.py | 79 ++++++++++++++---- ...ictions_1.2.7_provenance_and_tracking.slp} | Bin 102157 -> 104237 bytes tests/fixtures/slp.py | 2 +- tests/io/test_nwb.py | 33 ++++++++ 4 files changed, 95 insertions(+), 19 deletions(-) rename tests/data/slp/{tutorial_predictions_version_1.2.7_with_provenance.slp => predictions_1.2.7_provenance_and_tracking.slp} (51%) diff --git a/sleap_io/io/nwb.py b/sleap_io/io/nwb.py index cddfd848..30768535 100644 --- a/sleap_io/io/nwb.py +++ b/sleap_io/io/nwb.py @@ -109,6 +109,21 @@ def write_labels_to_nwb( https://pynwb.readthedocs.io/en/stable/pynwb.file.html#pynwb.file.NWBFile Defaults to None and default values are used to generate the nwb file. + + pose_estimation_metadata (dict): This argument has a dual purpose: + + 1) It can be used to pass time information about the video which is + necessary for synchronizing frames in pose estimation tracking to other + modalities. Either the video timestamps can be passed to + This can be used to pass the timestamps with the key `video_timestamps` + or the sampling rate with key`video_sample_rate`. + + e.g. pose_estimation_metadata["video_timestamps"] = np.array(timestamps) + or pose_estimation_metadata["video_sample_rate] = 15 # In Hz + + 2) The other use of this dictionary is to ovewrite sleap-io default + arguments for the PoseEstimation container. + see https://github.com/rly/ndx-pose for a full list or arguments. """ nwb_file_kwargs = nwb_file_kwargs or dict() @@ -129,7 +144,6 @@ def write_labels_to_nwb( ) nwbfile = NWBFile(**nwb_file_kwargs) - nwbfile = append_labels_data_to_nwb(labels, nwbfile, pose_estimation_metadata) with NWBHDF5IO(str(nwbfile_path), "w") as io: @@ -145,12 +159,33 @@ def append_labels_data_to_nwb( labels (Labels): A general labels object nwbfile (NWBFile): And in-memory nwbfile where the data is to be appended. + pose_estimation_metadata (dict): This argument has a dual purpose: + + 1) It can be used to pass time information about the video which is + necessary for synchronizing frames in pose estimation tracking to other + modalities. Either the video timestamps can be passed to + This can be used to pass the timestamps with the key `video_timestamps` + or the sampling rate with key`video_sample_rate`. + + e.g. pose_estimation_metadata["video_timestamps"] = np.array(timestamps) + or pose_estimation_metadata["video_sample_rate] = 15 # In Hz + + 2) The other use of this dictionary is to ovewrite sleap-io default + arguments for the PoseEstimation container. + see https://github.com/rly/ndx-pose for a full list or arguments. + Returns: NWBFile: An in-memory nwbfile with the data from the labels object appended. """ pose_estimation_metadata = pose_estimation_metadata or dict() + # Extract default metadata + provenance = labels.provenance + default_metadata = dict(scorer=str(provenance)) + sleap_version = provenance.get("sleap_version", None) + default_metadata["source_software_version"] = sleap_version + labels_data_df = _extract_predicted_instances_data(labels) # For every video create a processing module @@ -162,20 +197,27 @@ def append_labels_data_to_nwb( processing_module_name, nwbfile ) + # Propagate video metadata + default_metadata["original_videos"] = [f"{video.filename}"] # type: ignore + default_metadata["labeled_videos"] = [f"{video.filename}"] # type: ignore + + # Overwrite default with the user provided metadata + default_metadata.update(pose_estimation_metadata) + + # For every track in that video create a PoseEstimation container name_of_tracks_in_video = ( labels_data_df[video.filename] .columns.get_level_values("track_name") .unique() ) - # For every track in that video create a PoseEstimation container for track_index, track_name in enumerate(name_of_tracks_in_video): pose_estimation_container = build_pose_estimation_container_for_track( labels_data_df, labels, track_name, video, - pose_estimation_metadata, + default_metadata, ) nwb_processing_module.add(pose_estimation_container) @@ -260,7 +302,6 @@ def build_pose_estimation_container_for_track( ) # Arrange and mix metadata - pose_estimation_container_kwargs = dict( name=f"track={track_name}", description=f"Estimated positions of {skeleton.name} in video {video_path.name}", @@ -268,11 +309,7 @@ def build_pose_estimation_container_for_track( nodes=skeleton.node_names, edges=np.array(skeleton.edge_inds).astype("uint64"), source_software="SLEAP", - original_videos=[f"{video.filename}"], - labeled_videos=[f"{video.filename}"], # dimensions=np.array([[video.backend.height, video.backend.width]]), - # scorer=str(labels.provenance), - # source_software_version=f"{sleap.__version__} ) pose_estimation_container_kwargs.update(**pose_estimation_metadata) @@ -302,28 +339,32 @@ def build_track_pose_estimation_list( pose_estimation_series_list: List[PoseEstimationSeries] = [] for node_name in name_of_nodes_in_track: - # Add predicted instances only - data_for_node = track_data_df[ - node_name, - ] + # Drop data with missing values + data_for_node = track_data_df[node_name].dropna(axis="index", how="any") - data_for_node_cleaned = data_for_node.dropna(axis="index", how="any") - node_trajectory = data_for_node_cleaned[["x", "y"]].to_numpy() - confidence = data_for_node_cleaned["score"].to_numpy() + node_trajectory = data_for_node[["x", "y"]].to_numpy() + confidence = data_for_node["score"].to_numpy() + + reference_frame = ( + "The coordinates are in (x, y) relative to the top-left of the image. " + "Coordinates refer to the midpoint of the pixel. " + "That is, t the midpoint of the top-left pixel is at (0, 0), whereas " + "the top-left corner of that same pixel is at (-0.5, -0.5)." + ) pose_estimation_kwargs = dict( name=f"{node_name}", description=f"Sequential trajectory of {node_name}.", data=node_trajectory, unit="pixels", - reference_frame="No reference.", + reference_frame=reference_frame, confidence=confidence, confidence_definition="Point-wise confidence scores.", ) - # Add timestamps or rate if timestamps are uniform - frames = data_for_node_cleaned.index.values + # Add timestamps or only rate if the timestamps are uniform + frames = data_for_node.index.values timestamps_for_data = timestamps[frames] sample_periods = np.diff(timestamps_for_data) if sample_periods.size == 0: @@ -334,6 +375,8 @@ def build_track_pose_estimation_list( rate = 1 / sample_periods[0] if uniform_samples else None if rate: + # Video sample rates are ints but nwb expect floats + rate = float(int(rate)) pose_estimation_kwargs.update(rate=rate) else: pose_estimation_kwargs.update(timestamps=timestamps_for_data) diff --git a/tests/data/slp/tutorial_predictions_version_1.2.7_with_provenance.slp b/tests/data/slp/predictions_1.2.7_provenance_and_tracking.slp similarity index 51% rename from tests/data/slp/tutorial_predictions_version_1.2.7_with_provenance.slp rename to tests/data/slp/predictions_1.2.7_provenance_and_tracking.slp index 611015e779bceecd364bd2382fdca1bfb23aef07..00c31bd21ed9e0b8df71f3d7a8b626fb3505a5f7 100644 GIT binary patch delta 7127 zcmZ`-30PD|wx+t-L=ll)fyP$Dq6iIwEZtp3L?X(rh!}R7O`s8w#YRD2bj+wIUbSLS z;})H0B*Bhij0whhI-_~0(JYQWXEaXC@~L6In3(M)ukO9)kZ!)%eBU{zPW|VvQ&p$x z-pi&v_Q!hcw*)CdS#QV=g(7@nS8u*KKzUW6^l2wuy?(+Ndq-t@l4Qo%hbw;2boKf> z<=Du-mm+yfc+=id5ug^W8}_G0+?go%c?&xo5=f_T+aZK>vaS~5uGCgf5LAw@kMn48?}}Mq}7)V{>h_v8c2`nBZD4Zb|r5bz^-| z$8m{jXKXvM&$5#?*O6@-gg*JocVbE&VxR34hpWEt;Zh6$M&gpmHSbOIPvF< z`FL*>lQp-GizMR*oo#s+v9Ysp>@2?OKzkJd!+4MSEeN^=C&~Twk6(pSUb9c6IYBZ9U_*+4zOU`6} zcg}R#Z@RF}U*z=ij}&f8KAmd15Tp`)TE9r4P9@^%e7%J|sxWS?dqR|LPgOBSQFfnWfwj?JW+*6DSRLSHcAnrzgrfn@LU%xgIYj-OoxQrO z+SsfXngZ1%Lf93kvTdwiZV>tdCo03I#R&HU17(M6&;$}LLb4Y{|u zP?c1&X`!0-y*hEzZET_^$U2E{zvb_fe7S}Fomf%;8r5Ol|FIG}K+W1`-S2B>)!%N@ za?To1Vs8bo>A7d!f5gGM|KGN9#+cxL9IgAuJ!3s1>>nq=%&VrQj57OaGl}_wI4+F3 zEl%c)x!n3KDT|m7CLd6kcc$`YQVJJ$v$)utDT#v$^V)1~MK0vHki{9q{OKazH05%F zyfSW$G;r}oImf-alv`O0Z59_$-HgeI`zy+=6;$9<S9k8rwOa*P6Dm z=FhkD4q+P?GuZb^bNeoCg?4kuwcU}<=0&gbrrl94e(vGEBgc4H%A0)1gtxfwKgYTF z;v^T_PjIo}Z7yO@adG=JAE^HVkElJ*BV7O9}3An z9&fgO#+&}1f9hgR`kptxd(6e=pSjTeB#G(HeSdW%nWS%-1IZ$No=)U|qHpAA(ew5w zQNA%olzA?q%yJVY87s;s9wPpbJINq@-QJ>Su9xU}C1SN)eI?(Blnt)8!kD zXDaTeG>(?eUqlg708sDBmmJ1pO1Sl6+j5Nif3If+={S*1=~#%D(@CgYx(*q9Yn&1X z9hfQuwsiTb*X2S9-SIntlJF&&CaIi$MBMaD- z5udMSv$KYstewV6wvs!g^t4Vducc~uR)v91*flK2+0~3 z;Q%LweD}Fe-cD5x@wN5J)sAE|2dbDA=3sg34}pcZSuV zBl>PC5^;gm!xotJ@K~$!us$cBc0y=3=>Gj1osy7ZA~)o77nBS6DQ~W+W2L#nc4TTv zBiqfLa)n>dCGLKEJog9AL!xJf=VH4A=a#qxm@^7`OX*jT#`mCOv~@HUSptW#;N>yU zQ%EsDDZZNXpU*eH>DiSfc)-3~LVWFfZq29Z?CKKOJ{J5uKZz;j&r$a5^SBYk+~-%r zFPu!7$A$!hAu6#znD9LA2(Ob1N!*vPF*1@EP&~HAG9l@*!K`HF9sWR?85@$VO6Z%{oxc5KLOIG%%CrZJcx!H z(z52C+AxhG!~vf7V-sBTB?yfGLsXWl6f`G^Vt_jYj?UhsFSQJm#?U+-Q$i6k0iaX6 zXX_0JjRZqfQrTQC#FBR@!qS*oguc~7zT-o_Z-;!7iV5c5(^}cBQmn{}bT=qC$3O%x#%pwa!4?M(>PBUGbksiX**56~CRJf~~3 zgn}(^=R*UZ*V&$5;#MMr4zV3BnM)S~OBv{ozE9AvvV>Z(6_oL3Lf6k4Eo(2st7vZ- zakon^xU2499o&>33Llo?5|1tCJF~$3*Z4Ka`Ycj%EtOnCIe@jrCsxtJkjY!1-z!tc zynwN@hQ(sBEc9z;XUnlz9+mS+?_QBf*I`@&H>B0}5$-qA%Rqe#bZTm+?oVuW1!iAZ z!Kc~&gMqS+3Y_LX-kUtO--Eu0oQY4##c-^Feh177_?msSEsOryGElB8w@Oa>zs=rs z1NO~%3Ta?e19bs1U(g4at+-1i#5`q8QK&fw$TQbpi$^b3;n7YGQU1MaT;Py&jxSw^>4Q7|Yu<^+XHB zfG!BUU^MEtV<5gBh;6A}S1pyElshZ|@+$Q&m7bK0xmIH>4KC%iWF9c+*_djqB@ORQ zP8czu+ldq@V2H{xIa5dzJr7A;1Kp^7OaHQEpgi^gk<_#Bimn^`@Qq|R>7$eu`Y#}R z0Q#2a7>eJML=2QkJ!&}V`jjcU-AI}S#&FWvKdhiX0+~Jhk7ma1f9m&G2Fj#=s)1|U z=TKG!bpvn!=-jKH>G!g|eBeozD^Y1!Nf8nX(CqcudgfY-^%zpikNvsp?Q}nmnZXTd z%*id6Drg*pC4o+#R80>cRD6+=p!uam6%-*002-V#iXOCtf^D@~gC(me1{6Wy`MaL_ zS1~XP$H*MKVx$PE1?Z0BuG&ME&_5buRrCsq0VW9i$)t)s%=Q`aZDoH;9mT%&fLVA# zr)PVO!`@cD9ot9|u^o`x@73sD!!#CR#`(um>L^0?1N6BBv;K%B^p7&$WT>GS@CF2` z=2hyCVxaiODXo0_FS$mFkTU?yG+oraZVCOPF>EucDF$4Gz{Bh%-7ySYgi%~uR@IlO z)f59h@rK9;zqQjI%SgFwy_WGa{W!6jvan@vl6|sfx6*qMHVFE&On>d0ES#gQ3V6#U zBSpyn0d!Qp5Xbf*8#~8i{1V>F&>qLp7Z1yueY*_bVy4#PBetd<-(vRE^Aiw%dDD!O zNU($(Qu;pq52~nsgzTROT4#51LLbvK5O=Hho68?9VdER{{pRd#BU^wiP4yfDH9&Yh z=+j>o=}zN_d0>c2x-Q>o;Q!ERZgtYVgAjZ>M+iQ}b@NTM9XL0F{`GF4{)}ayd`NFz zDx(P53DAYhy!G!Qv;d6ZW3)yzQw%r+fsyv_>ff^rl*aHDu2)fnm;pMo=BS?aHxdu4 zuW-``B@_{70Xa|?6?2xIZG=;)G!1kx-{r8VT9+Hk=l~#Zg1!{kqdkXd6g?&XpJz6+ zPEKR>w(0+oiGBerk3pYQ9@G6dvK4bfnv^nnedCTM$bv;T@qZDXt~;Q=fI$5C5Zjgq z<$Pit9qlBe#)JO2_!<4**!!GAew+Raw`{w%Z_~50O?W5&qlrKMcC9Vc_x;#JHW0e&%VOP delta 6598 zcmZvg30zcF7r^HYvWOul7$`0<#3mRsZ0=oCahDJT1cL!)1{{PLgkc?)(X_PElvmx% zGF;MMivo={<&s!hpY?Oemd`YkzJ6t2TH)7bi}l@i-#u^c_!EA=^Ugcxf6hJk-23jE znSJ|xHg5OX9H$O=qIdk$YPBkSOK-6=!t0#c>-Pp>OYayl$j8r1i>NxuN2|UNzomDq z|1y>Q*OWl+#2z0%^}t9;yWsQ2;GS^V7bkA@oh5XMmwm?zUAWgFgm{^6juprGy(DyP z=?xY8{lbJU*Qy1=RbR0`C``;roFT>xo97e;m-vbin#U>KtyvAavs;rQgsX-QBIyN{ zx5|(FSJqiA(rCegN_9!R6}3}H5qo08#6$N~63rEzDi%fut94p&MQn<=Gde_4=f$Lo z*JSHvqNc`B;6hA_XpadA&}kDjw$j=PlT9-vUDQR#izj5jLXr)Mr5XMqYL2W%#HNbR z#nKEnW5a_|QrZ&j8naO|h1izGMP;Qc_sNQ;18pKGX`oC7MPamU zMQufi8jICvs?$taE?$YZiC@I~JCDbW5u7>W=XnpZ)K^uBJCZc9l0v3ZTWVNowp83( zQ)|*2h)Hj=>S2DAy_ysywrB&zFzskLc#<|!RtmLKC*GE&y3}S^p_y`V(kQPpOPp2q8ItY(Xi)PEabuCCD68A_{6EJva)Tyq;@Ehj0O^i`$?Qx!`f z8g=#Jmp)^0)?C5wwd5xSY2P?;$^k*0H9=gQHjeHiYnraZktT1<^ho(SmuwpT@AW+S z4*Z+cyjP{V-stK7#LLsK+3M*Z?d{?3P%S&;2^f4l0M({Tp8l78J^d&C^7P;E^Ys5Q z#&bsCA4M_e_VmSfIj7Ig7M#~+67fXFW`F0ZB{_mKB5R4@d?$Oi+WBT4aa80I(OpPx ziwcM)JV?Zg#e`d0M7a5-xfoYON60-A|9 z+(<;Pod|sk5ngR1_UrXT6CWX&Cp~H(?)2VHZk^8&k^jQ4Ud|VG6G!GQGUSaHQv;lh z2Z$ruNkq7dh*JlN*Yd_LwbO8f+zvZTgzqsT4jv`qmE%Md^bv9EZ8Ff6Gep~XnnWa@ zBex-+lcdKl5}WG_Vk^H)ZU^@h$F*zZ_Sx^q?ey<21Uf7JCdi0e*@CM`E$mjih6n=D zyS;?}3~_xGB;*LL&Ol*_;7S`Vc@yrIyq|?g@@$wS+d?H-7cR-jQIZTDE6J0irPw18 zl3qDZ$`GKDGI+&Emfm|LnHwuy_jQR{Nxwfy;=j>J%;#wmzadq!cTAGxgo%=jnkvZ; zCrk3Z>5{CUCdrjEBsmgE8Q!{I$nXpBnyIVLYX%<-PM$@K&(k4}ivl zmO`%Siq@cL-AjeAq0*)uLU%4X@4fMkJRvwn8f4oit>_76e~w+H%Uw(td?;UF_Tjf_vEwcX)`+y z8POz`K7$)(B<{^;&7|cl=rAJiXLFo4RTmVZ;M~m`t$1*6r zcNuK0?-VVn0Q!y!pzjYB0QZ!lol!L3Lh#N2jgHkary4s7`7Qp8qZPji7JLn8e9v;0 zfyQmMnz3;?AD8^~YWzG{Xn$0D>`232I!WBiCjnMrgnPOCpIwIoA7uGxbW_1nG>t{g z=b|S-^xf|0qx&-OYKW%w4Vz)dhdeBOtu^>3u;Az-=tr@lVMRNqXoHJ^o2h7aMLPf* z-8a)ZSD_26(yzISC*u!`*}kE*we}X_33^yxy>*M$tZ0H2)It&MJX5*O$PWw<_L) ziuQw|#g;;B323ylnbt;RV=w#F+)Fz5UyO^n{gMa_k%C^>3&85@<( zHPK+hYs>h3a&b)!J_;86V;MUw7DAdBIZd1aiZ2C?7Jqtw3HmP^{#UMyOo)%A@$|IQ z_Zkr}Ui6%jA^f8OI(U~+2&12CvhaX`EfUTkM)_DPH3Ha9Z1h;mhJoVcLS05EbO4a0 zfQ*-scoQIHCVCFBPi871K?b%FGnq?1zF~qIJ~lxO116{;pq%BQorvCBiq^4Nb#k*S zgwZF^Ky(y5rO}Ej>9)9*o0{jZ#;G{0{0&rRt81rXc3g%c1(+a z@#Q|Q8h}l9vp)wJ5HKMeYrF#+H)k#aCX!=Kz^2HJ_}>ax=maxpMn!vG(LPnQz)Ile zDcWk#=$wvTFF_mFm``(KTVU*obZolJ-~VmLe^kQBlDLxX?HU}k684p8B@DQoxap3r zg-ny>S^Qf;f~o+XuV`zkSXcvuO_jqA142tV6}}GtS;gI;35Xahqz0b|6o6HE9MI77e$>L!aHkCGTAa|0zhWV&~1($P4 z+C5qP5J^I}C4RvI59$$Oqf6z$T^8v4FBWL#y;e5v8pw6O+`n9@!)w?84+A=jqV)L`zpf zNz%*fP&^Ax=aOvlWN~mUOfjhzrnrdM=(=sj+FIzvItAPTAYGmx3mZ@o8=#UKs1pX7 z?=C7~YCQs`kz++*7r5EXgG~sSwH*889oVS61_aC&j(rN)g)}eTSx4VHPv2&~M9k#9 zgR#yAvkkF9TeLQ4%VJ`qCk6XOKqV}8muny2fKIZZ&vPX_3uDPFJ_bp)fg{tMT|KMhc}VhrJIU;uE$DX^?8E(*u-%izyXx6fMjr>WsSO1)I)aPp z!hQ9uDfD%$Kk$4 z)))G;>};w*;C_n3;9IMZ>n?MnsTu*ZlVe|oGC6kN;u-`@H^;sLY@R#spe0oZm?Iqf z>K)j>3atp3QyjYw*nD^1Z!@b9FrRYleoq#sG{dv?Kr`zNZT05&t>`NjH^9aB;H+l8 z)j6}95%_O$96k=|@E2qV`l!AJV;9gmacB#?eXkyP%B$s=|&I&pV zj$(JeFTP)nI#}@IT#_SqV5cv#BVe|2>``F#?!4khl?a$U9Q&3hOOJ!*I(6qd#XfqSGwmA{Y5K6jMqIr=1VHunO<@h)!Q` zLswbU&s_8gh%R$SkNwz=BiiA!Lv}ms{JVe}C`z}&$&4B_)L(Xlv#WIX9{#eOO$VPK t3FOZX{q1lv-U6FzWQXvWpnj