Merge pull request #45 from sdss/ipl3-astra

add sdss_id_groups
sdss · Oct 10, 2023 · 4d5a3d1 · 4d5a3d1
2 parents 022e942 + a3feba4
commit 4d5a3d1
Showing 1 changed file with 25 additions and 0 deletions.
diff --git a/python/sdss_access/path/path.py b/python/sdss_access/path/path.py
@@ -345,6 +345,8 @@ def extract(self, name, example):
             template = re.sub('@component_default[|]', '{component_default}', template)
         if re.search('@cat_id_groups[|]', template):
             template = re.sub('@cat_id_groups[|]', '{cat_id_groups}', template)
+        if re.search('@sdss_id_groups[|]', template):
+            template = re.sub('@sdss_id_groups[|]', '{sdss_id_groups}', template)
 
         # check if template has any brackets
         haskwargs = re.search('[{}]', template)
@@ -1257,6 +1259,29 @@ def cat_id_groups(self, filetype, **kwargs):
             cat_id = int(kwargs['cat_id'])
         return f"{(cat_id // k) % k:0>2.0f}/{cat_id % k:0>2.0f}"
 
+    def sdss_id_groups(self, filetype, **kwargs):
+        '''
+        Return a folder structure to group data together based on their SDSS
+        identifier so that we don't have too many files in any one folder.
+
+        Parameters
+        ----------
+        filetype : str
+            File type parameter.
+        sdss_id : int or str
+            SDSS-V identifier
+
+        Returns
+        -------
+        sdssid_groups : str
+            A set of folders.
+        '''        
+        # with k = 100 then even with 10 M sources, each folder will have ~1,000 files
+        k = 100
+        sdss_id = int(kwargs["sdss_id"])
+        return f"{(sdss_id // k) % k:0>2.0f}/{sdss_id % k:0>2.0f}"
+
+
     def component_default(self, filetype, **kwargs):
         ''' Return the component name, if given.