Loop using API not saving full particle data in request

Sophia Nasr
  • 2
  • 27 May '20

UPDATE: I found my problem with using zip, so I'm working on it to see if I can fix it. Will update here if the files save successfully so you may delete this. But I'm leaving the stuff below just in case something still isn't working. Thank you!

Hello,

I'm having trouble figuring out why my code fails to save all particle fields I requested. I check with the catalog for these subhalos, and see that they have a lot more particle data than is being saved (all of the subhalos I found should have at least 800 dark matter particles as I set a minimum total dm mass of 5e9 M_sun). The first couple of subhalos with IDs I put through the loop save fine (they have 1000-3000 KB), but then the files become anywhere between 1-200 KB (a lot are around 5 KB) which is obviously too small. I figured it out when I tried to create profiles in a loop and the loop broke, so I checked the points in that file and found it only saved ~100 dm particles when the catalog says that one has ~10000 dm particles. Is there something wrong with doing this in a loop? Here's my code:


def save_part_data(simRun,snapval,subidval,particle_type):

    baseUrl = 'http://www.tng-project.org/api/'
    headers = {"api-key":"my_key"}

    def get(path, params=None):
        # make HTTP GET request to path
        r = requests.get(path, params=params, headers=headers)

        # raise exception if response code is not HTTP SUCCESS (200)
        r.raise_for_status()

        if r.headers['content-type'] == 'application/json':
            return r.json() # parse json responses automatically

        if 'content-disposition' in r.headers:
            filename = r.headers['content-disposition'].split("filename=")[1]
            with open(filename, 'wb') as f:
                f.write(r.content)
            return filename # return the filename string

        return r

#     url = sim['snapshots'] + "z=0.2/"
#     print(url)

#     snap = get(url)
#     snap['number'], snap['redshift']

    sub_prog_url = "http://www.tng-project.org/api/"+simRun+"/snapshots/"+str(snapval)+"/subhalos/"+str(subidval)+"/"
    sub_prog = get(sub_prog_url)
#     print(sub_prog['pos_x'], sub_prog['pos_y'])

    cutout_request = {'dm':'Coordinates','stars':'Coordinates,Masses'}
    cutout = get(sub_prog_url+"cutout.hdf5", cutout_request)

    redshift = snap['redshift']
    scale_factor = 1.0 / (1+redshift)
    little_h = 0.6774

    subid=[]
    pos_x=[]
    pos_y=[]
    pos_z=[]
    dx=[]
    dy=[]
    dz=[]

    dm_pos_x=[]
    dm_pos_y=[]
    dm_pos_z=[]
    dm_dx=[]
    dm_dy=[]
    dm_dz=[]

    mass_stars=[]

    pos_x.append(sub_prog['pos_x'])
    pos_y.append(sub_prog['pos_y'])
    pos_z.append(sub_prog['pos_z'])

    if particle_type == 'stars':
        with h5py.File(cutout) as f:
            try:
                dx.append(f['PartType4']['Coordinates'][:,0] - sub_prog['pos_x'])
                dy.append(f['PartType4']['Coordinates'][:,1] - sub_prog['pos_y'])
                dz.append(f['PartType4']['Coordinates'][:,2] - sub_prog['pos_z'])
                dx=dx[0]
                dy=dy[0]
                dz=dz[0]

                dm_dx.append(f['PartType1']['Coordinates'][:,0] - sub_prog['pos_x'])
                dm_dy.append(f['PartType1']['Coordinates'][:,1] - sub_prog['pos_y'])
                dm_dz.append(f['PartType1']['Coordinates'][:,2] - sub_prog['pos_z'])
                dm_dx=dm_dx[0]
                dm_dy=dm_dy[0]
                dm_dz=dm_dz[0]

                rdm = np.sqrt(dm_dx**2 + dm_dy**2 + dm_dz**2)
                rdm *= scale_factor/little_h # ckpc/h -> physical kpc
                dmrsort=np.sort(rdm)

                rstars = np.sqrt(dx**2 + dy**2 + dz**2)
                rstars *= scale_factor/little_h # ckpc/h -> physical kpc
                rpos=np.array(np.argsort(rstars))
                rsort=np.sort(rstars)

                mass_stars.append(f['PartType4']['Masses'][:])
                mass_stars=mass_stars[0]
                mass_stars *= 1e10/little_h
                mass_stars_sort=mass_stars[rpos]

                allvals=np.array(list(zip(dm_dx,dm_dy,dm_dz,rdm,dmrsort,dx,dy,dz,rstars,rsort,mass_stars,mass_stars_sort)),\
                                 dtype=dt_subpart_data)
                allvalsrec=allvals.view(np.recarray)
                baseURL='C:/Users/phara/Python/saved_part_data/'
                filenamepkl = baseURL+'particledata_subhalo_'+str(subidval)
                outfilepkl = open(filenamepkl,'wb')
                pickle.dump(allvalsrec,outfilepkl)
                outfilepkl.close() 
                print("subhalo id =",subidval)
                return allvalsrec

            except KeyError:
                pass

    elif particle_type == 'no_stars':
        with h5py.File(cutout) as f:
            try:
                dx.append(f['PartType4']['Coordinates'][:,0] - sub_prog['pos_x'])

            except KeyError:
                dm_dx.append(f['PartType1']['Coordinates'][:,0] - sub_prog['pos_x'])
                dm_dy.append(f['PartType1']['Coordinates'][:,1] - sub_prog['pos_y'])
                dm_dz.append(f['PartType1']['Coordinates'][:,2] - sub_prog['pos_z'])
                dm_dx=dm_dx[0]
                dm_dy=dm_dy[0]
                dm_dz=dm_dz[0]

                rdm = np.sqrt(dm_dx**2 + dm_dy**2 + dm_dz**2)
                rdm *= scale_factor/little_h # ckpc/h -> physical kpc
                dmrsort=np.sort(rdm)

                allvals=np.array(list(zip(dm_dx,dm_dy,dm_dz,rdm,dmrsort)),dtype=dt_subpart_data_nostar)
                allvalsrec=allvals.view(np.recarray)
                baseURL='C:/Users/phara/Python/saved_part_data/'
                filenamepkl = baseURL+'particledata_subhalo_DMONLY_'+str(subidval)
                outfilepkl = open(filenamepkl,'wb')
                pickle.dump(allvalsrec,outfilepkl)
                outfilepkl.close()
                print("subhalo id DM ONLY =",subidval)
                return allvalsrec

Just to explain the try/except KeyError code: some of these subhalos don't have any stars in them, so my code breaks since there's no key PartType4. So if a key error exception comes up, I tell my code to pass IF I'm looking to save the ones with stars. If I want to save the ones with no stars, I use the try/except function in the way that it just tries to append a star field to an empty list, which will bring up the error and send it to the next line of code under except where I have it save only dm data.

Is there something wrong with using the API in a loop, or is it my code? Thank you!

Dylan Nelson
  • 28 May '20

Hi Sophia,

For simplicity I would separate out downloading/saving, and everything else, as

from os import rename

def get(path, params=None):
    # make HTTP GET request to path
    r = requests.get(path, params=params, headers=headers)

    # raise exception if response code is not HTTP SUCCESS (200)
    r.raise_for_status()

    if r.headers['content-type'] == 'application/json':
        return r.json() # parse json responses automatically

    if 'content-disposition' in r.headers:
        filename = r.headers['content-disposition'].split("filename=")[1]
        with open(filename, 'wb') as f:
            f.write(r.content)
        return filename # return the filename string

    return r

def save_part_data(simRun,snapval,subidval):
    """ Download and save particle cutout for one subhalo. """
    baseUrl = 'http://www.tng-project.org/api/'
    headers = {"api-key":"my_key"}

    sub_prog_url = "http://www.tng-project.org/api/"+simRun+"/snapshots/"+str(snapval)+"/subhalos/"+str(subidval)+"/"
    sub_prog = get(sub_prog_url)

    cutout_request = {'dm':'Coordinates','stars':'Coordinates,Masses'}
    cutout = get(sub_prog_url+"cutout.hdf5", cutout_request)

    final_filename = "cutout_snap=%d_sub=%d.hdf5" % (snapval,subidval)
    rename(cutout, final_filename)

def analyze_part_data(snapval,subidval):
    """ Analyze an already downloaded cutout. """
    filename = "cutout_snap=%d_sub=%d.hdf5" % (snapval,subidval)

    data = {}

    with h5py.File(filename,'r') as f:
        for groupname in ['PartType1','PartType4']:

            if groupname not in f:
                continue
            data[groupname] = {}

            for key in f[groupname]:
                data[groupname][key] = f[key][()

    # now compute rdm, rstars, and do any other analysis of interest
    dm_pos = data['PartType1']['Coordinates']
    # ...

def run():
    simRun = "TNG100-1"
    snap = 99
    subid = 123456

    save_part_data(simRun,snap,subid)
    analyze_part_data(simRun,snap,subid)
Sophia Nasr
  • 29 May '20

Hi Dylan, this is so much neater! Thank you! =)

  • Page 1 of 1