Updating the make_report module to print best duration/depth, fixing requirements for remote systems like Travis, adding extra metadata to FITS output

emprice · emprice · commit fdf045f53e66 · 2014-08-05T16:17:53.000-04:00
diff --git a/.travis.yml b/.travis.yml
@@ -14,8 +14,7 @@ before_install:
 # Install packages
 install:
   - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy scipy matplotlib dateutil pip pandas=0.13.0
-  - pip install -r python/requirements.txt
-  - pip install openpyxl==1.8.6
+  - pip install -r python/requirements.txt --allow-external pil --allow-unverified pil
 
 # Run test
 script:
diff --git a/python/clean_signal.py b/python/clean_signal.py
@@ -22,10 +22,10 @@ class NonIntegerClustersError(Exception):
 
 def clean_signal(time, flux, dtime, dflux, dfluxerr, out):
     '''
-    Remove possible eclipsing binary signals from a light curve. This works best
-    on deep, strongly periodic signals, so it is unlikely to clean transit signals
-    (though it sometimes will). This should help BLS pulse find less prominent
-    signals in the same data.
+    Remove possible eclipsing binary signals from a light curve. This works
+    best on deep, strongly periodic signals, so it is unlikely to clean
+    transit signals (though it sometimes will). This should help BLS pulse
+    find less prominent signals in the same data.
 
     :param time: Raw time vector (no detrending or binning)
     :type time: np.ndarray
@@ -42,15 +42,17 @@ def clean_signal(time, flux, dtime, dflux, dfluxerr, out):
     '''
     # We restrict the "standard deviation" of the cluster to be 5% of the
     # size of the space.
-    size = max(np.nanmax(np.absolute(out['depth_dip'])), np.nanmax(out['depth_blip']))
+    size = max(np.nanmax(np.absolute(out['depth_dip'])),
+        np.nanmax(out['depth_blip']))
     mean_flux_err = 0.05 * size
 
-    # Construct an array of all the useful quantities. We will only be finding
-    # clusters in the first two dimensions! The other dimensions are for bookkeeping.
+    # Construct an array of all the useful quantities. We will only be
+    # finding clusters in the first two dimensions! The other dimensions are
+    # for bookkeeping.
     ndx = np.where((out['srsq_dip'] > 0.) & (out['srsq_blip'] > 0.))
     X = np.column_stack((out['depth_dip'][ndx], out['depth_blip'][ndx],
-        out['duration_dip'][ndx], out['duration_blip'][ndx], out['midtime_dip'][ndx],
-        out['midtime_blip'][ndx]))
+        out['duration_dip'][ndx], out['duration_blip'][ndx],
+        out['midtime_dip'][ndx], out['midtime_blip'][ndx]))
 
     metric = lambda x, y: np.sqrt((x[0] - y[0])**2. / mean_flux_err**2. +
         (x[1] - y[1])**2. / mean_flux_err**2.)
@@ -95,18 +97,19 @@ def clean_signal(time, flux, dtime, dflux, dfluxerr, out):
             class_member_mask & core_samples_mask, err_flux=mean_flux_err)
     except NoClustersError:
         # We didn't find any clusters at all. This is a good place to stop.
-        logger.info('DBSCAN did not resolve any clusters in the depth_dip/period '
-            'space; stopping algorithm.')
+        logger.info('DBSCAN did not resolve any clusters in the '
+            'depth_dip/period space; stopping algorithm.')
         raise RuntimeError
     except NonIntegerClustersError:
         # Something weird is going on. Try one more time with a step of 2,
         # then quit, marking this system for further consideration.
         try:
-            best_period, best_duration, best_phase = __do_period_search(X, time,
-                class_member_mask & core_samples_mask, err_flux=mean_flux_err, step=2)
+            best_period, best_duration, best_phase = __do_period_search(X,
+                time, class_member_mask & core_samples_mask,
+                err_flux=mean_flux_err, step=2)
         except (NoClustersError, NonIntegerClustersError):
-            logger.warning('DBSCAN found multiple clusters that do not look like '
-                'integer multiples; investigate!')
+            logger.warning('DBSCAN found multiple clusters that do not look '
+                'like integer multiples; investigate!')
             raise RuntimeError
 
     def boxcar(time, duration, depth, P, phase):
@@ -119,7 +122,8 @@ def boxcar(time, duration, depth, P, phase):
 
         return flux
 
-    p0 = np.array([best_duration, depth, best_period, best_phase], dtype='float64')
+    p0 = np.array([best_duration, depth, best_period, best_phase],
+        dtype='float64')
     logger.info('Best guess boxcar parameters:\n\t' + str(p0))
 
     ndx = np.where(np.isfinite(dflux))
@@ -128,6 +132,7 @@ def boxcar(time, duration, depth, P, phase):
     logger.info('Best fit boxcar parameters:\n\t' + str(pbest))
 
     best_duration = pbest[0]
+    best_depth = pbest[1]
     best_period = pbest[2]
     best_phase = pbest[3]
 
@@ -136,7 +141,8 @@ def boxcar(time, duration, depth, P, phase):
         (pftime < best_phase + 2. * best_duration))
     flux[ndx] = np.nan
 
-    return dict(period=best_period, duration=best_duration, phase=best_phase)
+    return dict(period=best_period, duration=best_duration, depth=best_depth,
+        phase=best_phase)
 
 
 def __do_period_search(X, time, mask, step=1, err_midtime=0.1, err_flux=0.01,
@@ -150,9 +156,9 @@ def __do_period_search(X, time, mask, step=1, err_midtime=0.1, err_flux=0.01,
     metric = lambda x, y: np.sqrt((x[0] - y[0])**2. / err_flux**2. + \
         (x[1] - y[1])**2. / err_midtime**2.)
 
-    # Search for clusters a second time, this time to identify the period. We expect
-    # a cluster around the mean value and less significant ones around integer
-    # multiples of that value.
+    # Search for clusters a second time, this time to identify the period.
+    # We expect a cluster around the mean value and less significant ones
+    # around integer multiples of that value.
     try:
         db = DBSCAN(eps=1., min_samples=10, metric=metric).fit(Y[:,0:2])
     except ValueError:
@@ -167,9 +173,9 @@ def __do_period_search(X, time, mask, step=1, err_midtime=0.1, err_flux=0.01,
     n_clusters_ = len(unique_labels) - (1 if -1 in labels else 0)
 
     if n_clusters_ == 1:
-        # This is the best-case scenario. The best choice for the period is just
-        # the mean of the consecutive differences. The phase and duration follow
-        # easily.
+        # This is the best-case scenario. The best choice for the period is
+        # just the mean of the consecutive differences. The phase and
+        # duration follow easily.
         class_member_mask = (labels != -1)
 
         best_period = np.mean(Y[class_member_mask & core_samples_mask][:,1])
@@ -197,8 +203,8 @@ def __do_period_search(X, time, mask, step=1, err_midtime=0.1, err_flux=0.01,
             candidate_periods.append([np.mean(Y[class_member_mask &
                 core_samples_mask][:,1]), kk])
 
-        # Check for integer multiples in the candidate periods list. The modulus
-        # by the minimum one should be sufficient.
+        # Check for integer multiples in the candidate periods list. The
+        # modulus by the minimum one should be sufficient.
         candidate_periods = np.array(candidate_periods)
         min_period = np.amin(candidate_periods[:,0])
         mods = np.mod(candidate_periods[:,0], min_period)
diff --git a/python/fits_output.py b/python/fits_output.py
@@ -14,22 +14,24 @@ def __init__(self):
         '''
         Initialize the object.
         '''
+        self.prihdr = None
         self.prihdu = None
         self.cfghdu = None
         self.ext_list = []
 
 
-    def make_header(self, kic_id):
+    def make_header(self, kic_cadence_id):
         '''
         Create the FITS header. Currently, only the KIC ID of the relevant star
         is saved in this header, but other fields will be added later.
 
         :param kic_id: KIC ID of this star
         :type kic_id: str
         '''
-        prihdr = pyfits.Header()
-        prihdr['KIC_ID'] = kic_id
-        self.prihdu = pyfits.PrimaryHDU(header=prihdr)
+        self.prihdr = pyfits.Header()
+        kic_id, cadence = kic_cadence_id.split('_')
+        self.prihdr['KIC_ID'] = kic_id
+        self.prihdr['CADENCE'] = 'long' if cadence == 'llc' else 'short'
 
 
     def push_bls_output(self, bls_out):
@@ -67,17 +69,39 @@ def push_detrended_lightcurve(self, time, flux, fluxerr, clean_out=None):
         :param clean_out: Unprocessed output from ``clean_signal``
         :type clean_out: dict
         '''
+        hdr = pyfits.Header()
+
         if clean_out is not None:
-            hdr = pyfits.Header()
             keys = clean_out.keys()
             vals = clean_out.values()
 
             for k, v in zip(keys, vals):
                 hdr[k] = v
-        else:
-            hdr = None
 
-        columns = [pyfits.Column(name='Time', array=time, format='D'),
+            try:
+                hdr.comments['period'] = 'period of strongest signal [days]'
+            except KeyError:
+                pass
+
+            try:
+                hdr.comments['phase'] = 'phase of strongest periodic ' \
+                    'signal [XXXXX]'
+            except KeyError:
+                pass
+
+            try:
+                hdr.comments['duration'] = 'duration of strongest ' \
+                    'periodic signal [days]'
+            except KeyError:
+                pass
+
+            try:
+                hdr.comments['depth'] = 'depth of strongest periodic signal'
+            except KeyError:
+                pass
+
+        columns = [pyfits.Column(name='Time', array=time, format='D',
+            unit='BJD - 2454833'),
             pyfits.Column(name='Flux', array=flux, format='D'),
             pyfits.Column(name='Flux error', array=fluxerr, format='D')]
         cols = pyfits.ColDefs(columns)
@@ -100,7 +124,11 @@ def push_config(self, config):
         columns = [pyfits.Column(name='Parameter', array=keys, format='A20'),
             pyfits.Column(name='Value', array=vals, format='A20')]
         cols = pyfits.ColDefs(columns)
-        self.cfghdu = pyfits.TableHDU.from_columns(cols)
+
+        hdr = pyfits.Header()
+        hdr['EXTNAME'] = 'INPUT_PARAMS'
+
+        self.cfghdu = pyfits.TableHDU.from_columns(cols, header=hdr)
 
 
     def write_file(self, fname, clobber=False):
@@ -109,13 +137,27 @@ def write_file(self, fname, clobber=False):
 
         :param fname: The name of the file to save
         :type fname: str
-        :param clobber: Whether to clobber an existing output file; passed directly to pyfits
+        :param clobber: Whether to clobber an existing output file; passed
+            directly to pyfits
         :type clobber: bool
         '''
-        if self.prihdu is not None:
-            hdus = [self.prihdu]
+        hdus = []
+
+        if self.prihdr is not None:
+            self.prihdr['N_EXTEN'] = (len(self.ext_list) + 1,
+                '(n_passes * 2) + 1')
+            self.prihdu = pyfits.PrimaryHDU(header=self.prihdr)
+            hdus.append(self.prihdu)
 
         if len(self.ext_list) > 0:
+            j = len(self.ext_list) / 2
+
+            for i in xrange(0,len(self.ext_list),2):
+                self.ext_list[i].header['EXTNAME'] = 'BLIP-DIP_Pass_%02d' % j
+                self.ext_list[i+1].header['EXTNAME'] = 'TIME-FLUX_Pass_%02d' % j
+
+                j -= 1
+
             hdus.extend(self.ext_list)
 
         if self.cfghdu is not None:
diff --git a/python/postprocessing/make_report.py b/python/postprocessing/make_report.py
@@ -92,6 +92,7 @@ def __init_parser():
         period = lchdr['period']
         phase = lchdr['phase']
         duration = lchdr['duration']
+        depth = lchdr['depth']
 
         pftime = np.mod(time, period)
         signal_mask = ((pftime > phase - 0.5 * duration) &
@@ -116,7 +117,11 @@ def __init_parser():
         plt.xlabel(r'Time (days)')
         plt.ylabel(r'Flux')
         plt.figtext(0.05, 0.02,
-            r'P = %.4f, phi = %.2f' % (period, phase / period))
+            r'P = %.4f, phi = %.2f, W = %.2f, delta = %.2g' % (period,
+            phase / period, duration, depth))
+
+        plt.tight_layout()
+        plt.subplots_adjust(bottom=0.15)
     except KeyError:
         plt.subplot(111)
         plt.scatter(time, flux, color=color1, edgecolor=edgecolor,
@@ -127,7 +132,7 @@ def __init_parser():
         plt.ylabel(r'Flux')
         plt.title(r'KIC' + kic_id + r', pass #%d' % count)
 
-    plt.tight_layout()
+        plt.tight_layout()
 
     imgdata = cStringIO.StringIO()
     fig.savefig(imgdata, format='png')
diff --git a/python/requirements.txt b/python/requirements.txt
@@ -1,6 +1,7 @@
 numpy
 scipy
-pandas
+openpyxl==1.8.6
+pandas==0.13.0
 matplotlib
 cython
 pyfits