<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Parse-program-info-and-visit-timeline-webpages" data-toc-modified-id="Parse-program-info-and-visit-timeline-webpages-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Parse program info and visit timeline webpages</a></span></li></ul></div>

# Parse program info and visit timeline webpages



In [296]:
import os
import numpy as np
import matplotlib.pyplot as plt
import requests


In [434]:
# https://stackoverflow.com/questions/24124643/parse-xml-from-url-into-python-object
import traceback
import urllib3
import xmltodict

def getxml(url="https://yoursite/your.xml"):
    http = urllib3.PoolManager()

    response = http.request('GET', url)
    try:
        data = xmltodict.parse(response.data)
    except:
        print("Failed to parse xml from response (%s)" % traceback.format_exc())
        data = {}
        
    return data

def visit_xml(proposal_id=1324):
    url = f"https://www.stsci.edu/cgi-bin/get-visit-status?id={proposal_id}&markupFormat=xml&observatory=JWST"
    data = getxml(url=url)
    if 'visitStatusReport' in data:
        data = data['visitStatusReport']
        
    return data

def prop_html(proposal_id=1324):
    from bs4 import BeautifulSoup
    vgm_url = f"https://www.stsci.edu/cgi-bin/get-proposal-info?id={proposal_id}&observatory=JWST"
    html_text = requests.get(vgm_url).text
    soup = BeautifulSoup(html_text, 'html.parser')
    return soup

def program_info(proposal_id=1324):
    
    soup = prop_html(proposal_id=proposal_id)
    meta = {'proposal_id':proposal_id}
    meta['raw'] = soup
    
    if 1:
        ps = soup.findAll('p')
        meta['pi'] = ps[0].contents[1].strip()
        meta['title'] = ps[1].contents[1].strip()
        meta['cycle'] = int(ps[1].contents[5].strip())
        meta['allocation'] = float(ps[1].contents[9].strip().split()[0])
        meta['proptime'] = float(ps[1].contents[-1].strip().split()[0])
        meta['type'] = soup.findAll('h1')[0].contents[1].contents[0]
    else:
        meta['pi'] = 'x'
        meta['title'] = 'x'
        meta['cycle'] = 0
        meta['allocation'] = 0
        meta['proptime'] = 0.
        meta['type'] = 'x'
        
    visits = visit_xml(proposal_id)
    #for k in ['visit']: #visits:
    #    meta[k] = visits[k]
    if isinstance(visits['visit'], list):
        meta['visit'] = visits['visit']
    else:
        meta['visit'] = [visits['visit']]
        
    return meta

    #return soup

#visit = visit_xml(proposal_id=1324)
meta = program_info(proposal_id=1567)

In [462]:
from grizli import utils
import astropy.units as u

def show_window(v, meta):
    
    row = [meta[k] for k in ['type','proposal_id','title', 'pi', 'cycle', 'allocation', 'proptime']]
    row += [v[k] for k in ['@observation', '@visit', 'target', 'configuration', 'hours']]

    if 'planWindow' not in v:
        row.extend(['2029-01-01','-','-'])
        return row, None
        
    w = v['planWindow']

    if '(2' in w:
        dates = ('2'+w.split('(2')[1].strip()).strip(')').split(' - ')
    else:
        row.extend(['2029-01-01','-','-'])
        return row, None
    
    inst = v['configuration'].strip().split()[0].lower()

    colors = {'niriss':utils.MPL_COLORS['b'], 
              'nirspec':utils.MPL_COLORS['orange'], 
              'miri':utils.MPL_COLORS['r'],
              'nircam':utils.MPL_COLORS['g']}

    fig, ax = plt.subplots(1,1,figsize=(8,0.4))

    wlim = astropy.time.Time(['2022:150','2024:300'], format='yday')
    # ax.plot_date(wlim.plot_date, [0,0], color='w', lw=5, linestyle='-')

    wdate = astropy.time.Time([d.replace('.',':') for d in dates], format='yday')
    ax.plot_date(wdate.plot_date, [0,0], color=colors[inst], lw=2, fmt='s-')

    prop = meta['proptime']
    #prop = 6

    if prop > 0:
        pub = wdate + prop/12.*u.year
        ax.plot_date(wdate.plot_date, [0,0], fmt='o', zorder=100, color='w', markersize=4)
        ax.plot_date(pub.plot_date, [0,0], color=colors[inst], lw=2, linestyle=':', fmt='s-', 
                     alpha=0.5)

    wgrid = astropy.time.Time(['2022-07-01', '2022-10-01', 
                               '2023-01-01', '2023-04-01', '2023-07-01', '2023-10-01', 
                               '2024-01-01', '2024-04-01', '2024-07-01', '2024-10-01'])

    ax.vlines(wgrid.plot_date[[2,6]], -1, 1, color='k', alpha=0.2)
    ax.set_ylim(-0.2, 0.2)
    #ax.axis('off')

    #ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    #ax.spines['left'].set_visible(False)

    ax.set_xticks(wgrid.plot_date)
    ax.set_xlim(*wlim.plot_date)

    ax.tick_params(length=0, which='both')

    ax.get_yaxis().set_visible(False)

    ax.set_xticklabels([])
    ax.set_yticklabels([])

    ax.grid()
    
    iso = [w.iso.split()[0] for w in wdate]
    # dates, meta['visit'][0], meta

    row += iso

    timeline = f"jwst_{meta['proposal_id']}_{v['@observation']}_{v['@visit']}"
    
    row.append(f'<img src="{timeline}.png" />')
    
    fig.savefig(timeline+'.png')

    return row, fig

In [465]:
progs = [1837, # primer
         1345, # ceers
         1433, # coe
         1727, # cosmos
         2079, # ngdeep
         2426, # bagley
         2659, # beasts bubbles
         2561, # uncover
         1914, # aurora
         1895, # fresco
         1567, # seiji
         1324, # glass
         1208, # canucs
         1180, # eisenstein, gs1
         1286, # gs2
         1287, # gs3
         1181, # gn1
         1210, # ferruit
         1211, # ferruit NRS GTO
         1213, 
         1214, 
         1215, 
         1216,
         1217, 
         1262, 
         1263, # colina
         1284, # colina 2
         1264, # colina 3
         1283, # miri udf
         1243, # lilly
         1176, # windhorst
         1207, # rieke
         2282, # earendel
         1869, # LyC22
         1626, # stefanon
         1657, # harikane
         1671, # maseda
         1740, # harikane 2
         1747, # roberts-borsani
         1758, # finkelstein
         1791, # spilker
         1810, # belli
         1933, # mathee
         1963, # williams udf
         2110, # kriek
         2136, # tucker
         2279, # naidu
         2285, # carnall
         2345, # newman
         2362, # marsan
         2478, # stark
         2484, # mirka
         2516, # hodge
         2555, # sunburst
         2565, # glazebrook, 
         1908, # vanzella
         1764, # fan agn
         1964, # overzier
         1328, # armus
         1355, # rigby
         1199, # stiavelli
         2123, # kassin
         2130, # lee
         2198, # barrufet
         2234, # banados
         2321, # best
         2566, 
         2674,
         1871,
         2078, 
         1678, # ashby
         2107, 
         1717, 
         1554,
        ]

In [455]:
rows = []
done = []
failed = []

len(progs)
res = {}

In [466]:
os.chdir('/Users/gbrammer/Research/JWST/LRP')
names = ['type','proposal_id','title', 'pi', 'cycle', 'allocation', 'proptime',
         'observation', 'visit', 'target', 'configuration', 'hours', 
         'win_start', 'win_end', 'timeline']

# progs = [1963]

for i, prog in enumerate(progs):
    print(i, prog)
    if prog in done:
        continue
    
    try:
        m = program_info(proposal_id=prog)
    except IndexError:
        failed.append(prog)
        print('Fail')
        continue
    except TypeError:
        failed.append(prog)
        continue
        
    done.append(prog)
    res[prog] = m
    
    for v in m['visit']:
        row, fig = show_window(v, m)  
        rows.append(row)
    
    plt.close('all')
    

0 1837
1 1345
2 1433
3 1727
4 2079
5 2426
6 2659
7 2561
8 1914
9 1895
10 1567
11 1324
12 1208
13 1180
14 1286
15 1287
16 1181
17 1210
18 1211
19 1213
20 1214
21 1215
22 1216
23 1217
24 1262
25 1263
26 1284
27 1264
28 1283
29 1243
30 1176
31 1207
32 2282
33 1869
34 1626
35 1657
36 1671
37 1740
38 1747
39 1758
40 1791
41 1810
42 1933
43 1963
44 2110
45 2136
46 2279
47 2285
48 2345
49 2362
50 2478
51 2484
52 2516
53 2555
54 2565
55 1908
56 1764
57 1964
58 1328
59 1355
60 1199
61 2123
62 2130




63 2198




64 2234




65 2321




66 2566




67 2674




68 1871




69 2078




70 1678
71 2107




72 1717
73 1554




In [467]:
failed

[]

In [468]:
tab = utils.GTable(rows=rows, names=names)
# tab['timeline'] = [f'<img src="{t}.png" />' for t in tab['timeline']]
tab['proposal_id'] = [f'<a href="https://www.stsci.edu/cgi-bin/get-proposal-info?id={p}&observatory=JWST">{p}</a>'
                      for p, t in zip(tab['proposal_id'], tab['type'])]

# tab['pi'] = [p.strip().split()[-1] for p in tab['pi']]

prop = []
for p in tab['proptime']:
    if p > 0:
        prop.append(f'<p style="color:red;"> {p} </p>')
    else:
        prop.append(f'<p style="color:green;"> {p} </p>')

tab['proptime'] = prop

tab.rename_column('proposal_id','prog')
tab.rename_column('proptime','prop')
tab.rename_column('pi','PI')
tab.rename_column('title','Proposal_Title')

tab.write_sortable_html('timeline.html', localhost=False, max_lines=10000) #, filter_columns=['prop'])

with open('timeline.html') as fp:
    lines = fp.readlines()
#
lines.insert(-3, f'<p style="font-size:x-small;"> Generated at {astropy.time.Time.now().iso} by {os.getlogin()}</p>\n')
with open('timeline.html','w') as fp:
    fp.writelines(lines)

In [469]:
len(tab)

998

In [461]:
len(tab)

776