afdsew.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. #!/usr/local/bin/python
  2. import click
  3. import logging
  4. logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s")
  5. import pprint
  6. import glob
  7. import bisect
  8. from bisect import bisect_right
  9. import re
  10. import string
  11. import datetime
  12. from cStringIO import StringIO
  13. import arrow
  14. import subprocess
  15. import sys
  16. import os
  17. from os import path
  18. import shutil
  19. import urllib2
  20. import hashlib
  21. import markdown, feedgen.feed as feed
  22. script_root_dir = path.dirname( path.abspath(__file__) )
  23. def format_afd_content(content):
  24. content = re.sub("(?<!\n)\n(?!\n)", " ", content)
  25. return "\n".join( string.capwords(line, ". ") for line in content.splitlines() ) + "\n"
  26. def parse_afd_time(time_string):
  27. return arrow.get(time_string, "hmm A ZZZ ddd MMM D YYYY").datetime
  28. def pformat_time(timestamp):
  29. return timestamp.strftime("%I:%M %p %A %B %d")
  30. def parse_afd( afd ):
  31. afd = "\n".join(afd.split("\n "))
  32. # Find all headers in afd and all potential section endpoints
  33. headers = { h.group(1) : h.span() for h in re.finditer("^\.([^.]*)\.\.\.", afd, re.MULTILINE)}
  34. endpoints = sorted( set( [ endmark.start() for endmark in re.finditer("\n&&\n", afd, re.MULTILINE) ] + [s for s, e in headers.values()] ))
  35. # Find closest endpoint for each header's content section and get content
  36. header_result_spans = { h : (content_start, endpoints[ bisect_right( endpoints, content_start)]) for h, (header_start, content_start) in headers.items() }
  37. afd_data = { h : afd[start:end].strip() for h, (start, end) in header_result_spans.items() }
  38. rs = re.search("Area Forecast Discussion\nNational Weather Service Seattle WA\n(.*)\n", afd)
  39. afd_data["TIME"] = rs.group(1)
  40. return {
  41. "timestamp" : parse_afd_time( afd_data["TIME"] ),
  42. "content" : afd_data }
  43. def format_afd(afd):
  44. afd_sections = afd["content"]
  45. meta_sections = ["TIME", "SYNOPSIS"]
  46. main_section_names = ["SHORT TERM", "LONG TERM"]
  47. main_sections = []
  48. for n in main_section_names:
  49. main_sections.extend( glob.fnmatch.filter( afd_sections.keys(), n + "*") )
  50. formatted_AFD = StringIO()
  51. formatted_AFD.write( pformat_time(afd["timestamp"]) + "\n")
  52. formatted_AFD.write("=" * len( afd_sections["TIME"]) + "\n" )
  53. synopsis_raw = afd_sections.get("SYNOPSIS") or afd_sections.get("UPDATE") or ""
  54. formatted_AFD.write( format_afd_content(synopsis_raw) + "\n")
  55. for h in main_sections:
  56. formatted_AFD.write( h + "\n" )
  57. formatted_AFD.write( "-" * len(h) + "\n" )
  58. formatted_AFD.write( format_afd_content(afd_sections[h]))
  59. formatted_AFD.write("\n")
  60. for h in set( afd_sections.keys() ).difference( set( main_sections + meta_sections )):
  61. formatted_AFD.write( h + "\n" )
  62. formatted_AFD.write( "-" * len(h) + "\n" )
  63. formatted_AFD.write( format_afd_content(afd_sections[h]))
  64. formatted_AFD.write("\n")
  65. return formatted_AFD.getvalue()
  66. def setup_afd_feed(result_dir, afd_entries):
  67. # TODO: Generate index.html from md stored in raw_SEW instead
  68. #
  69. # if path.exists(result_dir):
  70. # logging.info("Removing existing root: %s", result_dir)
  71. # shutil.rmtree(result_dir)
  72. # os.makedirs(result_dir)
  73. afd_feed = feed.FeedGenerator()
  74. afd_feed.title("NWS Seattle Area Forecast Discussion")
  75. afd_feed.link(href="https://afd.fontkeming.fail/SEW/current.md", rel="self")
  76. afd_feed.id('https://afd.fontkeming.fail')
  77. afd_feed.author(name="Ryan Rix", email="ry@n.rix.si")
  78. afd_feed.description("NWS Seattle Area Forecast Discussion")
  79. current = None
  80. current_md = None
  81. for afd_entry in sorted(afd_entries, reverse = False, key=lambda e: e["timestamp"] ):
  82. eid = afd_entry["timestamp"].strftime("%y-%m-%d-%H%m")
  83. if not current:
  84. afd_feed.updated(afd_entry["timestamp"])
  85. current = eid
  86. entry_md = format_afd(afd_entry)
  87. logging.debug("Rendered entry md:\n%s", entry_md)
  88. entry_md_file = path.join(result_dir, eid + ".md")
  89. logging.info("Writing entry file: %s", entry_md_file)
  90. with open(entry_md_file, "w") as md_out:
  91. md_out.write(entry_md)
  92. item = afd_feed.add_entry()
  93. md = markdown.markdown( entry_md )
  94. if not current_md:
  95. current_md = md
  96. item.title(pformat_time(afd_entry["timestamp"]))
  97. item.link(href=(eid + ".md"))
  98. item.description(md)
  99. item.pubDate(afd_entry["timestamp"])
  100. item.updated(afd_entry["timestamp"])
  101. item.guid(eid)
  102. item.id(eid)
  103. logging.info("Writing current: %s", current)
  104. with open( path.join(result_dir, "current.md"), "w") as md_out:
  105. md_out.write(current_md)
  106. logging.info("Rendering feed file: %s", path.join(result_dir, "AFDSEW.xml"))
  107. afd_feed.atom_file( path.join(result_dir, "AFDSEW.xml"))
  108. return result_dir
  109. def fetch_feed_files(search_prefix):
  110. feed_files = glob.glob(path.join(search_prefix+"/raw_SEW/AFDSEW.*"))
  111. return feed_files
  112. def is_new_afd(text, search_prefix):
  113. feed_files = fetch_feed_files(search_prefix)
  114. new_hash = hashlib.sha224(text).hexdigest()
  115. for fname in feed_files:
  116. with open(fname, "r") as f:
  117. hash = hashlib.sha224(f.read()).hexdigest()
  118. if hash == new_hash:
  119. return False
  120. return True
  121. @click.command()
  122. @click.option("--prefix", default=script_root_dir)
  123. def doit(prefix):
  124. fetch(prefix)
  125. generate(prefix)
  126. def generate(prefix):
  127. logging.info("Generating")
  128. feed_files = fetch_feed_files(prefix)
  129. afd_entries = [parse_afd( open( s ).read() ) for s in feed_files]
  130. logging.info("Parsed %s entries.", len(afd_entries))
  131. setup_afd_feed(path.join(prefix, "SEW"), afd_entries)
  132. def fetch(prefix):
  133. logging.info("Fetching")
  134. url = "https://www.wrh.noaa.gov/total_forecast/getprod.php?" + \
  135. "new&wfo=sew&sid=SEW&pil=AFD&toggle=textonly"
  136. rsp = subprocess.check_output("curl '{url}'".format(url=url), shell=True)
  137. afd = parse_afd(rsp)
  138. ts = afd.get("timestamp")
  139. suffix = ts.strftime("%s")
  140. if is_new_afd(rsp, prefix):
  141. with open(prefix + "/raw_SEW/AFDSEW." + suffix, "w") as f:
  142. f.write(rsp)
  143. logging.info("Done")
  144. if __name__ == "__main__":
  145. doit()