Contents
import pandas as pd

filename = 'gd1_candidates.hdf5'
candidate_df = pd.read_hdf(filename, 'candidate_df')
from astropy.table import Table

candidate_table = Table.from_pandas(candidate_df)
type(candidate_table)
astropy.table.table.Table
table = candidate_table[['source_id']]
table.write('candidate_df.xml', format='votable', overwrite=True)
query = """SELECT *
FROM gaiadr2.panstarrs1_best_neighbour as best
JOIN tap_upload.candidate_df as candidate_df
ON best.source_id = candidate_df.source_id
"""
from astroquery.gaia import Gaia

job = Gaia.launch_job_async(query=query, 
                            upload_resource='candidate_df.xml', 
                            upload_table_name='candidate_df')
Created TAP+ (v1.2.1) - Connection:
	Host: gea.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Created TAP+ (v1.2.1) - Connection:
	Host: geadata.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
INFO: Query finished. [astroquery.utils.tap.core]
results = job.get_results()
results
Table length=3724
source_idoriginal_ext_source_idangular_distancenumber_of_neighboursnumber_of_matesbest_neighbour_multiplicitygaia_astrometric_paramssource_id_2
arcsec
int64int64float64int32int16int16int16int64
6358602187266581761309113851876713490.0536670358954670841015635860218726658176
6356741263839655681308313884284887200.0388102681415775161015635674126383965568
6355354547749830401306313783776573690.0343230288289910761015635535454774983040
6354972768103136001308113804456319300.047202554132500061015635497276810313600
6356141686401328641305713959221401350.0203041897099641431015635614168640132864
6355986079743697921303413920912795130.0365246268534030541015635598607974369792
6357376618354965761310013993335021360.0366268278207166061015635737661835496576
6358509458927486721320113986549341470.0211787423933783961015635850945892748672
6356005321197136641304213922858936230.045188209150430151015635600532119713664
........................
6122417812491246081297513437559955610.042357158300018151015612241781249124608
6123321473614430721301413414585387770.022652498590129771015612332147361443072
6124267440168024321305213468524656560.032476530099618431015612426744016802432
6123317393403417601301113412177938390.0360642408180257351015612331739340341760
6122827380582649601297413404459335190.0252932373534968981015612282738058264960
6123863326686976001303513545702197740.020103160014030861015612386332668697600
6122961727178186241296913380061687800.0512642120258362051015612296172717818624
6122503754801017601297413464758974640.0317837403475309051015612250375480101760
6123949268991591681305813551997517950.040191748305466981015612394926899159168
6122564185004231681299313490752973100.0092427896695131561015612256418500423168
table_ext = results[['source_id', 'original_ext_source_id']]
table_ext.write('external.xml', format='votable', overwrite=True)
query2 = """SELECT
external.source_id, ps.g_mean_psf_mag, ps.i_mean_psf_mag
FROM gaiadr2.panstarrs1_original_valid as ps
JOIN tap_upload.external as external
ON ps.obj_id = external.original_ext_source_id
"""
job2 = Gaia.launch_job_async(query=query2, 
                       upload_resource='external.xml', 
                       upload_table_name='external')
INFO: Query finished. [astroquery.utils.tap.core]
results2 = job2.get_results()
results2
Table length=3724
source_idg_mean_psf_magi_mean_psf_mag
mag
int64float64float64
63586021872665817617.897800445556617.5174007415771
63567412638396556819.287300109863317.6781005859375
63553545477498304016.923799514770516.478099822998
63549727681031360019.924200057983418.3339996337891
63561416864013286416.151599884033214.6662998199463
63559860797436979216.522399902343816.1375007629395
63573766183549657614.503299713134813.9849004745483
63585094589274867216.517499923706116.0450000762939
63560053211971366420.450599670410219.5177001953125
.........
61224178124912460820.234399795532218.6518001556396
61233214736144307221.384899139404320.3076000213623
61242674401680243217.828100204467817.4281005859375
61233173934034176021.865699768066419.5223007202148
61228273805826496022.515199661254919.9743995666504
61238633266869760019.379299163818417.9923000335693
61229617271781862417.494400024414116.926700592041
61225037548010176015.333000183105514.6280002593994
61239492689915916816.441400527954115.8212003707886
61225641850042316820.871599197387719.9612007141113
filename = 'gd1_photo.fits'
results2.write(filename, overwrite=True)
from astropy.table import Table

photo_table = Table.read(filename)
len(photo_table)
3724
photo_table
Table length=3724
source_idg_mean_psf_magi_mean_psf_mag
mag
int64float64float64
63586021872665817617.897800445556617.5174007415771
63567412638396556819.287300109863317.6781005859375
63553545477498304016.923799514770516.478099822998
63549727681031360019.924200057983418.3339996337891
63561416864013286416.151599884033214.6662998199463
63559860797436979216.522399902343816.1375007629395
63573766183549657614.503299713134813.9849004745483
63585094589274867216.517499923706116.0450000762939
63560053211971366420.450599670410219.5177001953125
.........
61224178124912460820.234399795532218.6518001556396
61233214736144307221.384899139404320.3076000213623
61242674401680243217.828100204467817.4281005859375
61233173934034176021.865699768066419.5223007202148
61228273805826496022.515199661254919.9743995666504
61238633266869760019.379299163818417.9923000335693
61229617271781862417.494400024414116.926700592041
61225037548010176015.333000183105514.6280002593994
61239492689915916816.441400527954115.8212003707886
61225641850042316820.871599197387719.9612007141113
import matplotlib.pyplot as plt

def plot_cmd(photo_table):
    y = photo_table['g_mean_psf_mag']
    x = photo_table['g_mean_psf_mag'] - photo_table['i_mean_psf_mag']

    plt.plot(x, y, 'ko', markersize=0.3, alpha=0.3)

    plt.xlim(0, 1.5)
    plt.ylim(14, 22)

    plt.gca().invert_yaxis()

    plt.ylabel(r'$g_0$')
    plt.xlabel(r'$(g-i)_0$');
plot_cmd(photo_table)
_images/pilot_day4_17_0.png
import matplotlib as mpl

mpl.get_backend()
'module://ipykernel.pylab.backend_inline'
mpl.rcParams['backend']
'module://ipykernel.pylab.backend_inline'
import matplotlib as mpl

# NOTE: Don't do this.  

mpl.use('TkAgg')
plot_cmd(photo_table)
coords = plt.ginput(10)
_images/pilot_day4_20_0.png
mpl.use('agg')
coords = [(0.2643369175627239, 17.84253127299485),
 (0.3539426523297491, 18.799116997792495),
 (0.47491039426523296, 19.682119205298015),
 (0.6317204301075269, 20.454746136865342),
 (0.7661290322580645, 20.785871964679913),
 (0.8064516129032258, 21.41133186166299),
 (0.5869175627240143, 21.300956585724798),
 (0.39426523297491034, 20.565121412803535),
 (0.22401433691756267, 19.240618101545255),
 (0.19713261648745517, 18.02649006622517)]
coords
[(0.2643369175627239, 17.84253127299485),
 (0.3539426523297491, 18.799116997792495),
 (0.47491039426523296, 19.682119205298015),
 (0.6317204301075269, 20.454746136865342),
 (0.7661290322580645, 20.785871964679913),
 (0.8064516129032258, 21.41133186166299),
 (0.5869175627240143, 21.300956585724798),
 (0.39426523297491034, 20.565121412803535),
 (0.22401433691756267, 19.240618101545255),
 (0.19713261648745517, 18.02649006622517)]
import numpy as np

xs, ys = np.transpose(coords)
plot_cmd(photo_table)
plt.plot(xs, ys);
_images/pilot_day4_25_0.png
from matplotlib.path import Path

path = Path(coords)
points = [(0.4, 20), (0.4, 16)]
path.contains_points(points)
array([ True, False])
photo_table
Table length=3724
source_idg_mean_psf_magi_mean_psf_mag
mag
int64float64float64
63586021872665817617.897800445556617.5174007415771
63567412638396556819.287300109863317.6781005859375
63553545477498304016.923799514770516.478099822998
63549727681031360019.924200057983418.3339996337891
63561416864013286416.151599884033214.6662998199463
63559860797436979216.522399902343816.1375007629395
63573766183549657614.503299713134813.9849004745483
63585094589274867216.517499923706116.0450000762939
63560053211971366420.450599670410219.5177001953125
.........
61224178124912460820.234399795532218.6518001556396
61233214736144307221.384899139404320.3076000213623
61242674401680243217.828100204467817.4281005859375
61233173934034176021.865699768066419.5223007202148
61228273805826496022.515199661254919.9743995666504
61238633266869760019.379299163818417.9923000335693
61229617271781862417.494400024414116.926700592041
61225037548010176015.333000183105514.6280002593994
61239492689915916816.441400527954115.8212003707886
61225641850042316820.871599197387719.9612007141113
import pandas as pd

candidate_df = pd.read_hdf('gd1_candidates.hdf5', 'candidate_df')
candidate_df.head()
source_id ra dec pmra pmdec parallax parallax_error radial_velocity phi1 phi2 pm_phi1 pm_phi2
0 635559124339440000 137.586717 19.196544 -3.770522 -12.490482 0.791393 0.271754 NaN -59.630489 -1.216485 -7.361363 -0.592633
1 635860218726658176 138.518707 19.092339 -5.941679 -11.346409 0.307456 0.199466 NaN -59.247330 -2.016078 -7.527126 1.748779
2 635674126383965568 138.842874 19.031798 -3.897001 -12.702780 0.779463 0.223692 NaN -59.133391 -2.306901 -7.560608 -0.741800
3 635535454774983040 137.837752 18.864007 -4.335041 -14.492309 0.314514 0.102775 NaN -59.785300 -1.594569 -9.357536 -1.218492
4 635497276810313600 138.044516 19.009471 -7.172931 -12.291499 0.425404 0.337689 NaN -59.557744 -1.682147 -9.000831 2.334407
len(candidate_df), len(photo_table)
(7346, 3724)

Hint: that’s a join

type(candidate_df)
pandas.core.frame.DataFrame
type(photo_table)
astropy.table.table.Table
photo_df = photo_table.to_pandas()
type(photo_df)
pandas.core.frame.DataFrame

left: all the rows from the left table

right: all the rows from the right table

inner: intersection

outer: union

merged = pd.merge(candidate_df, photo_df,
                  on='source_id',
                  how='left')
len(merged)
7346
len(candidate_df)
7346
merged.head()
source_id ra dec pmra pmdec parallax parallax_error radial_velocity phi1 phi2 pm_phi1 pm_phi2 g_mean_psf_mag i_mean_psf_mag
0 635559124339440000 137.586717 19.196544 -3.770522 -12.490482 0.791393 0.271754 NaN -59.630489 -1.216485 -7.361363 -0.592633 NaN NaN
1 635860218726658176 138.518707 19.092339 -5.941679 -11.346409 0.307456 0.199466 NaN -59.247330 -2.016078 -7.527126 1.748779 17.8978 17.517401
2 635674126383965568 138.842874 19.031798 -3.897001 -12.702780 0.779463 0.223692 NaN -59.133391 -2.306901 -7.560608 -0.741800 19.2873 17.678101
3 635535454774983040 137.837752 18.864007 -4.335041 -14.492309 0.314514 0.102775 NaN -59.785300 -1.594569 -9.357536 -1.218492 16.9238 16.478100
4 635497276810313600 138.044516 19.009471 -7.172931 -12.291499 0.425404 0.337689 NaN -59.557744 -1.682147 -9.000831 2.334407 19.9242 18.334000
merged['mag'] = merged['g_mean_psf_mag']
merged['color'] = merged['g_mean_psf_mag'] - merged['i_mean_psf_mag']
merged.columns
Index(['source_id', 'ra', 'dec', 'pmra', 'pmdec', 'parallax', 'parallax_error',
       'radial_velocity', 'phi1', 'phi2', 'pm_phi1', 'pm_phi2',
       'g_mean_psf_mag', 'i_mean_psf_mag', 'mag', 'color'],
      dtype='object')
merged['color'].notnull().sum()
3724
merged['color'].isnull().sum()
3622

Next step: select the rows where the photometry data falls in the polygon we identified

from matplotlib.path import Path

path = Path(coords)
path
Path(array([[ 0.26433692, 17.84253127],
       [ 0.35394265, 18.799117  ],
       [ 0.47491039, 19.68211921],
       [ 0.63172043, 20.45474614],
       [ 0.76612903, 20.78587196],
       [ 0.80645161, 21.41133186],
       [ 0.58691756, 21.30095659],
       [ 0.39426523, 20.56512141],
       [ 0.22401434, 19.2406181 ],
       [ 0.19713262, 18.02649007]]), None)
points = [(0.4, 20), (0.4, 16)]
path.contains_points(points)
array([ True, False])
points = merged[['color', 'mag']]    # select `color` and `mag`
points.head()
color mag
0 NaN NaN
1 0.3804 17.8978
2 1.6092 19.2873
3 0.4457 16.9238
4 1.5902 19.9242
is_inside = path.contains_points(points)
is_inside.sum()
481
selected2 = merged[is_inside]
selected2.head()
source_id ra dec pmra pmdec parallax parallax_error radial_velocity phi1 phi2 pm_phi1 pm_phi2 g_mean_psf_mag i_mean_psf_mag mag color
52 636170384085347968 136.762984 18.627660 -4.929947 -12.324957 0.080572 0.774262 NaN -60.526296 -0.857776 -7.890487 0.556206 20.595800 20.0585 20.595800 0.537300
58 636199658582021632 136.690129 18.973757 -3.199581 -13.434429 0.062917 0.320584 NaN -60.268864 -0.616365 -7.907829 -1.483160 19.345800 19.0042 19.345800 0.341600
78 636363795052181888 136.586430 19.402934 -3.360965 -13.160503 0.156392 0.436150 NaN -59.956071 -0.306603 -7.748759 -1.180119 19.855801 19.3862 19.855801 0.469601
84 636456703784789248 136.061195 19.322230 -5.691713 -11.289811 -0.026094 0.377111 NaN -60.285543 0.072223 -7.407813 1.842187 19.303400 18.8925 19.303400 0.410900
88 636563700010029312 136.320112 19.709247 -5.901632 -12.474899 -0.654865 0.766358 NaN -59.828026 0.068110 -8.503892 1.377714 20.515301 20.0103 20.515301 0.505001
mpl.use('agg')
%matplotlib inline
plot_cmd(photo_table)
plt.plot(xs, ys)

#plt.plot(selected2['color'], selected2['mag'], 'gx');
[<matplotlib.lines.Line2D at 0x7f4603e2c130>]
_images/pilot_day4_56_1.png
plt.figure(figsize=(10, 2.5))

x = selected2['phi1']
y = selected2['phi2']

plt.plot(x, y, 'ko', markersize=0.6, alpha=0.6)

plt.xlabel('phi1 (degree GD1)')
plt.ylabel('phi2 (degree GD1)')

plt.axis('equal');
_images/pilot_day4_57_0.png
!rm gd1_merged.hdf5
filename = 'gd1_merged.hdf5'

merged.to_hdf(filename, 'merged')
selected2.to_hdf(filename, 'selected', mode='a')
!ls -lh gd1_merged.hdf5
-rw-rw-r-- 1 downey downey 1.1M Nov 20 14:09 gd1_merged.hdf5
len(merged)
7346
len(selected2)
481
coords_df = pd.DataFrame(coords)
filename = 'gd1_polygon.hdf5'

coords_df.to_hdf(filename, 'coords_df')
import os
from wget import download
url = 'https://raw.githubusercontent.com/AllenDowney/AstronomicalData/main/class_notebooks/lesson_7.ipynb'
print(download(url))
lesson_7 (1).ipynb