From b03adcd7f10b398573900a80463b785e695a6191 Mon Sep 17 00:00:00 2001
From: Allen Downey
The following cells download the data from the previous lesson, if necessary, and load it into a Pandas DataFrame.
import os
-from wget import download
+from os.path import basename, exists
-filename = 'gd1_data.hdf'
-path = 'https://github.com/AllenDowney/AstronomicalData/raw/main/data/'
+def download(url):
+ filename = basename(url)
+ if not exists(filename):
+ from urllib.request import urlretrieve
+ local, _ = urlretrieve(url, filename)
+ print('Downloaded ' + local)
-if not os.path.exists(filename):
- print(download(path+filename))
+download('https://github.com/AllenDowney/AstronomicalData/raw/main/' +
+ 'data/gd1_data.hdf')
import pandas as pd
+filename = 'gd1_data.hdf'
+
centerline_df = pd.read_hdf(filename, 'centerline_df')
selected_df = pd.read_hdf(filename, 'selected_df')
<scipy.spatial.qhull.ConvexHull at 0x7fb34c626a60>
+<scipy.spatial.qhull.ConvexHull at 0x7f712e3b6dc0>
This use of transpose is a bit of a NumPy trick. Because pm_vertices has two columns, its transpose has two rows, which are assigned to the two variables pmra_poly and pmdec_poly.
This use of transpose is a useful NumPy idiom. Because pm_vertices has two columns, its matrix transpose has two rows, which are assigned to the two variables pmra_poly and pmdec_poly.
The following figure shows proper motion in ICRS again, along with the convex hull we just computed.
+
So pm_vertices represents the polygon we want to select.
@@ -684,10 +689,10 @@ Here’s the function from Lesson 2 we used to do that.
Here again are the columns we want to select.
+Here are the columns we want to select.
columns = 'source_id, ra, dec, pmra, pmdec, parallax'
+columns = 'source_id, ra, dec, pmra, pmdec'
SELECT
-source_id, ra, dec, pmra, pmdec, parallax
+source_id, ra, dec, pmra, pmdec
FROM gaiadr2.gaia_source
WHERE parallax < 1
AND bp_rp BETWEEN -0.75 AND 2
@@ -789,7 +794,7 @@ Using flatten
SELECT
-source_id, ra, dec, pmra, pmdec, parallax
+source_id, ra, dec, pmra, pmdec
FROM gaiadr2.gaia_source
WHERE parallax < 1
AND bp_rp BETWEEN -0.75 AND 2
@@ -812,7 +817,17 @@ WHERE parallax < 1
-INFO: Query finished. [astroquery.utils.tap.core]
+Created TAP+ (v1.2.1) - Connection:
+ Host: gea.esac.esa.int
+ Use HTTPS: True
+ Port: 443
+ SSL Port: 443
+Created TAP+ (v1.2.1) - Connection:
+ Host: geadata.esac.esa.int
+ Use HTTPS: True
+ Port: 443
+ SSL Port: 443
+INFO: Query finished. [astroquery.utils.tap.core]
<Table length=7345>
name dtype unit description
--------- ------- -------- ------------------------------------------------------------------
@@ -821,11 +836,10 @@ source_id int64 Unique source identifier (unique within a particular
dec float64 deg Declination
pmra float64 mas / yr Proper motion in right ascension direction
pmdec float64 mas / yr Proper motion in declination direction
- parallax float64 mas Parallax
-Jobid: 1610562623566O
+Jobid: 1615824245107O
Phase: COMPLETED
Owner: None
-Output file: async_20210113133023.vot
+Output file: async_20210315120405.vot
Results: None
@@ -863,12 +877,12 @@ Results: None
-
+
Here we can see why it was useful to transform these coordinates. In ICRS, it is more difficult to identity the stars near the centerline of GD-1.
-So let’s transform the results back to the GD-1 frame.
-Here’s the code we used to transform the coordinates and make a Pandas DataFrame, wrapped in a function.
+So let’s transform the results back to the GD-1 frame.
+Here’s the code we used to transform the coordinates and make a Pandas DataFrame, wrapped in a function.
from gala.coordinates import reflex_correct
@@ -923,11 +937,11 @@ Results: None
-
+
-We’re starting to see GD-1 more clearly.
-We can compare this figure with this panel from Figure 1 from the original paper:
+We’re starting to see GD-1 more clearly.
+We can compare this figure with this panel from Figure 1 from the original paper:
This panel shows stars selected based on proper motion only, so it is comparable to our figure (although notice that it covers a wider region).
In the next lesson, we will use photometry data from Pan-STARRS to do a second round of filtering, and see if we can replicate this panel.
@@ -947,23 +961,22 @@ Results: None
We can use ls to confirm that the file exists and check the size:
We can use getsize to confirm that the file exists and check the size:
!ls -lh gd1_data.hdf
+from os.path import getsize
+
+MB = 1024 * 1024
+getsize(filename) / MB
-rw-rw-r-- 1 downey downey 3.3M Jan 13 13:32 gd1_data.hdf
+2.575897216796875
If you are using Windows, ls might not work; in that case, try:
!dir gd1_data.hdf
-We can check the file size like this:
!ls -lh gd1_data.csv
+getsize('gd1_data.csv') / MB
-rw-rw-r-- 1 downey downey 1.4M Jan 13 13:33 gd1_data.csv
+1.238297462463379
We can see the first few lines like this:
!head -3 gd1_data.csv
+def head(filename, n=3):
+ with open(filename) as fp:
+ for i in range(n):
+ print(next(fp))
+
+
+head('gd1_data.csv')
,source_id,ra,dec,pmra,pmdec,parallax,phi1,phi2,pm_phi1,pm_phi2
-0,635559124339440000,137.58671691646745,19.1965441084838,-3.770521900009566,-12.490481778113859,0.7913934419894347,-59.63048941944402,-1.2164852515042963,-7.361362712597496,-0.592632882064492
-1,635860218726658176,138.5187065217173,19.09233926905897,-5.941679495793577,-11.346409129876392,0.30745551377348623,-59.247329893833296,-2.016078400820631,-7.527126084640531,1.7487794924176672
+,source_id,ra,dec,pmra,pmdec,phi1,phi2,pm_phi1,pm_phi2
+
+0,635559124339440000,137.58671691646745,19.1965441084838,-3.770521900009566,-12.490481778113859,-59.63048941944402,-1.2164852515042963,-7.361362712597496,-0.592632882064492
+
+1,635860218726658176,138.5187065217173,19.09233926905897,-5.941679495793577,-11.346409129876392,-59.247329893833296,-2.016078400820631,-7.527126084640531,1.7487794924176672
N$uD<&ZH*9^>(i)+u64`WTGKfQ==@_nep36RKf{)V
z7 ~#U|0b*1P3I3I7@DLbQG;2mQgc(jB%|nGxK9d$J_mb1N9P<
zJKh)N-re2I^`Vniv9VDu-cC+VvcBb6S%JD&o2q#_wFyAey4MLbZWgD_U79xiNki_wYzbyMRu`
z3wa9?hP0aB!}>t?BMk77n1m!XEzR%!^zY6xZjurD;y%%l=wDX++yAMKD@X9SALrpy
z<<@8MRpHCr$eOdIFBdQXofQM+k(_Bxg6uXNo~( iSIf6f*1V|
z*1pp(w9STQW*S&A*tS6Z+f!1|ZB ;dXT)F(q@TpZf_o|E9YXJb=n&L(8hB+8R?tG$=lT<1y#U5J
zCJFIz7&fQ~Hj>b_@&UhO^d7Xy{~_-HreKl{^_K^tM-AmACd^gfw1_bq^W0%TtmIr?
z=zCwIj`BljZ@@>ul0ak=W*Q~V$RY_=CjZ3*f5KFfW|CmANQ+Cw#!+1Oa*YI};?rrb
zr55^NxjCr5TU%6K3k~J=3PVrOL9;Wag;zTeOB `&$
zsoKV6^eOw-+^NrS_THvarmj?8T1$j;FK
fqI0AAqO4R7}KVE}1m
z6Td?iPx8_Pma^;LqITpA8x!w
mEA=_JdFKXz{($kN`Hk(3N>84j>{&EonR9tsbY(t?`lJ5^QM
z_~43qkO9;oN?(=-Eb%C#HTZX-Rg-kd1r2>%Gma0^9)-+D6jgB?TwL%;jrhG`LIHSc
z_PglengE-G$_Ip2uV@;f6vh)H@oSnkr}KJKwcemT{4xJt3qF)YK5JJyL!*$I@jE~T
zSand#?1OfQ5#R~#U0DX*2a#ypy`vDc0wo7*0Dt~DGxPfN{55A`%|148Abm5lj!xc6
z*8>`IuTu($(IBS&oRlO3Bp>9LDbN>izo7}VMfC;BU!Oooa9=b9;3N@`|M~ON!xUIL
zQj_0WfZ(;XtZZ;
Wm=LQ6hZs30j(lwZ0Lu-D_#q@XN;l(%Sm2+{;7S@VY=9xlp-u!xr2
z@W+oI?`rE{hxBeJ1lK{mNJ%ctGHGY6pwQbI|qmBdhlL#4v>6TEk2SeQk@*U&}=IeqB
zHwbbANiT>fuV5_%liWx-<41w@LkBr5d!q*`VKvY#Ae%<*llAhzA27BV@;9gziD=!6
z;m<%gNJFQvHoHUGOwM|4Y94P1bgGI_1`W_Y`tb%2SdnCDkK%}PkHOP~E?e&ov*%%W
zuFITkVG$7qGdn9!S}t&?zR>eJFNtGYs0AceIbcacSVLw1LjDxmC$KR%#KnhoM@C)J
zFKgfWl{&qZn?U^YNDotjcTCY7otq%h-OVO0ZX$T=T`M^o-d*SGzt@^SVL$iTdmWo(
zn&ZcGgm)cb{H#w;vZS%Pw3a#Gat^hnxOnfeP;v;4<=O3yS4NTHFF9cx4ym2uy5?#y
zBc+0#=cU>8uxqbH9ik$(TgQ67eN%=GnOi5dkOl{5og(b~>0R%{v}^FoySt5b?gEy;
z-@!V4jTD}<$uP&feLeV5(b;i7DqlAPjXy>6(NRkT@!K#XB`bAHolRZmDfl@}(m&3#
zAlp9FH!=W#YARR48%_N68`5xF8J+6`0NT9+yB`sEpuNw@jxjjSh2>f#2VjgI4bAtS
zxy?cgCc9D>IPVs|seYP+^oek^)i(s;$0O-