#!/var/www/py3/bin/python

import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import sys
import os
import cgi
import cgitb

#cgitb.enable()
DEBUG = False
start_year = 0
end_year = 0

#TODO: cache wikipedia data, fetch only if newer
#have total (pre-submission) count be realtime and not hardcoded
#if(len(sys.argv) >= 2):
#  start_year = sys.argv[1]
#if (len(sys.argv) == 3):
#  end_year = sys.argv[2]

print("Content-Type: text/html")
print()

print("""<html>
<head>
      <meta name="viewport" content="width=device-width, initial-scale=1.0">  
      <meta name="author" content="scottvr"/> 
      <meta name="description" content="display the composition and changes of the US Supreme Court for any time period"> 
      <meta name="keywords" content="Supreme Court, composition, how many, who, eras"/> 
<style>
tbody td {
  padding: var(--cap);
}
tbody tr:nth-child(odd) {
  background-color: var(--bg, #dadbdc);
  color: #000;
}
tbody tr:nth-child(even) {
  background-color: var(--bg, #ecebe5);
  color: #000;
}
.smalltr {
  padding: 0x;
}
#rotate {
    display: none;
}

@media screen and (max-width: 640px) {
    #rotate {
        background-color: rgba(0,0,0,0.5);
        display: block;
        height: 100%;
        position: absolute;
        top: 0;
        width: 100%;
    }
}
#info {display: none;}

</style>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.5.3/dist/css/bootstrap.min.css"
            integrity="sha384-TX8t27EcRE3e/ihU7zmQxVncDAy5uIKz4rEkgIXeMed4M0jlfIDPvg6uqKI2xXr2" crossorigin="anonymous">
</head>
<body>
      <div class="container">
      <div>
        <img class="img-fluid" style="width: 100%; height: auto; display: block;" src="https://www.killsignal.net/armatronbig3.svg"</div>
        <br>
        <div>
        <center>
        <h2><code class="text-warning">How many "versions" of the Supreme Court have there been?</code></h2>
        </center>
        </div>
        <br>
          <div>Just something silly to have spent a few hours creating. The impetus was that all of the charts, graphs, and tables I found on wikipedia and supremecourt.gov that show the composition of the US Supreme Court throughout history, do not provide a straightforward way to know how many different "compositions" (I'd say "era", but that has an established meaning as being the time period for which a particular chief justice was in office) the court has had over the years. (That is, how many different groups of appointed-for-life justices have existed, not just how many eras of individual Chief Justices have existed, which is easy enough to find.) Plus, it gave me a chance to write something using pandas dataframes. As is obvious, "scope creep" plays a roll and now it displays other information in other ways and the data I wanted that drove me to write a small program is pushed to the very bottom of the screen. :-)</div>
          <br>
          <div><strike>Note that it may take a while to complete execution due to literally iterating through every day from start date to end date (up to some 86k days). Probably not optimal.</strike> It now only iterates through each justice's days in office, checking for other justices in office those days. If it's not optimal, I'm not concerned; execution time for all the court's history went from 3 minutes to < 3 seconds after making this change. Upon completion the output will be a table showing each year in the range requested and the judges on the bench at the time <strike>(small rows containing just a year indicate a new year is starting and if there are several in a row it means the composition didn't change from year to year.)</strike> I also did away with that (by oversight) when optimizing for speed.</div><br>
          <div> Oh! The number I was originally looking to find - the entire reaason I started working on this problem - will be shown at the bottom of the page, after the table. <b>If you just came to know that answer from the first justice to today: as of this writing there have been 211 different groups of judges comprising the US Supreme Court.</b> If you select any sort of date range, the total for that period will be displayed.</div>
      <BR>
        <div>
       
""")


form = cgi.FieldStorage()

if form.getvalue("start_year"):
  start_year = int(form.getvalue("start_year"))
  if not isinstance(start_year, int):
      start_year = 0
if form.getvalue("end_year"):
  end_year = int(form.getvalue("end_year"))
  if not isinstance(end_year, int):
      end_year = 0

if(os.environ['REQUEST_METHOD'] == "GET"):
  end_year=0
  start_year=0

end_year = int(end_year if end_year < 9999 else 0)
start_year = int(start_year if 0 < int(start_year) < 9999 else 0)
  
if form.getvalue("run"):
  if DEBUG: 
    print("<pre>")
  wikiurl = "https://en.wikipedia.org/wiki/List_of_United_States_Supreme_Court_justices_by_time_in_office"
  table_class = "wikitable sortable jquery-tablesorter"
  response = requests.get(wikiurl)
  if(response.status_code == 200):
    if DEBUG:
      print(f"HTTP 200 OK!")
  else:
    print(f"Failed with {response.status_code} from en.wikipedia.org")
    exit();
  
  if DEBUG:
    print("Parsing html from wikipedia. Extracting justices table.")
  soup = BeautifulSoup(response.text, 'html.parser')
  justice_table = soup.find('table', {'id': "justices"})
  
  if DEBUG:
    print("Creating DataFrame")
  justice_df = pd.read_html(str(justice_table))
  justice_df = pd.DataFrame(justice_df[0])
  
  pd.set_option('display.max_columns', None)
  pd.set_option('display.max_rows', None)
  if DEBUG:
    print(str(justice_df.head()).encode('utf-8').strip())
  
  if DEBUG:
    print("Dropping unwanted columns from DataFrame and renaming remaining.")
  #justice_df.drop(["Rank", "Lengthin days", "Lengthin years and days"], axis=1, inplace=True)
  justice_df.rename(columns={"Start date[3]": "Start", "End date[3]": "End"}, inplace=True)
  # Replace "Incumbent" with the current date
  justice_df.replace("Incumbent", datetime.now().strftime("%B %#e, %Y"), inplace=True)
  
  # Convert the Start and End columns to datetime objects
  justice_df["Start"] = pd.to_datetime(justice_df["Start"], format="%B %d, %Y", errors="raise", exact=False)
  justice_df["End"] = pd.to_datetime(justice_df["End"], format="%B %d, %Y", errors="raise", exact=False)

  if DEBUG:
    print(str(justice_df.head()).encode('utf-8').strip())
  
  try:
  # Convert the Start and End columns to datetime objects
    justice_df["Start"] = pd.to_datetime(justice_df["Start"], format="%B %d, %Y", errors="raise")
  except ValueError as e:
    print('to_datetime error (parsing "unparseable" given explicit format):\n\t' + str(e))
  try:
    justice_df["End"] = pd.to_datetime(justice_df["End"], format="%B %d, %Y", errors="raise")
  except ValueError as e:
    print('to_datetime error (parsing "unparseable" given explicit format):\n\t' + str(e))

  # Create an empty list to store unique combinations of justices
  unique_combinations = []
  
  if(int(start_year) > 0):
    try:
      start_date = current_date = pd.to_datetime(f'January 1, {start_year}', format="%B %d, %Y", errors="raise", exact=False)
    except ValueError as e:
      print('to_datetime error (parsing "unparseable" given explicit format):\n\t' + str(e))
    if DEBUG:
      print(f"start year specified... {start_year}")
  if(int(end_year) > 0):
    try:
      end_date = pd.to_datetime(f'December 31, {end_year}', format="%B %d, %Y", errors="raise", exact=False)
    except ValueError as e:
      print('to_datetime error (parsing "unparseable" given explicit format):\n\t' + str(e))
  # Iterate through the dates from the earliest to the latest
  if start_year == 0:
    start_date = justice_df["Start"].min()
  if end_year == 0 or end_date > justice_df["End"].max():
    end_date = justice_df["End"].max()

  current_date = start_date

  if DEBUG:
    print(f"end year specified... {end_year}")
  
  if DEBUG:
    print(f"start date: {current_date}")
    print(f"end date: {end_date}")
  
  current_year = int(start_year if start_year else 0)
  
  print("</pre>")
  print("<table>")

  # Sort justices by start date
  justice_df = justice_df.sort_values(by='Start')

  # Initialize variables
  unique_combinations = set()
  current_justices = set()
  current_date = start_date
  last_change_date = start_date

  # Find justices serving on the start date
  initial_serving = justice_df[(justice_df["Start"] <= start_date) & (justice_df["End"] >= start_date)].index.tolist()

  # Initialize current_justices set with the initial serving justices
  current_justices = set(initial_serving)

  # Create a list of unique start and end dates
  unique_dates = sorted(set(justice_df['Start'].tolist() + justice_df['End'].tolist()))
  
  # Iterate through unique dates
  for date in unique_dates:
      if date < start_date or date > end_date:
          continue
  
      # Find justices whose tenure started or ended on the current date
      starting = justice_df[justice_df['Start'] == date].index.tolist()
      ending = justice_df[justice_df['End'] == date].index.tolist()
  
      # Update current justices set
      current_justices = (current_justices - set(ending)) | set(starting)

      # If there was a change in he set of serving justices, update the unique_combinations set if starting or ending:
      serving_tuple = tuple(sorted(current_justices))
      if serving_tuple not in unique_combinations:
          # Print the date and the names of the justices
#          print(f'<tr><td style="--cap: 30px;">{datetime.strftime(date, "%B %d, %Y")}</td>')
          print(f'<tr><td style="--cap: 30px;">{str(date)}</td>')
          names = justice_df.loc[list(serving_tuple), 'Justice']
          print(f'<td style="--cap: 30px;">{", ".join(names.values)}</td></tr>')
  
          unique_combinations.add(serving_tuple)

  # Print the number of unique combinations of justices
  print(f'</tbody></table><hr><b><pre>Unique Supreme Court compositions during the period {start_date} - {end_date}: {len(unique_combinations)}</pre><b></body></html>')
  
# Using HTML input and forms method
print('<form method="post">')
print('<div class="form-group"?>')
print("""<table style="border-collapse: separate; border-spacing: 50px 0;><tbody>
      <tr style="--bg: #FFF; --pad: 10px;">
      <td style="padding: 10px 0; background-color: #FFF;">Start search in year:</td><td style="background-color: #FFF; padding: 10px 0;"><input type='text' name='start_year' /></td><td style="background-color: #FFF; padding: 10px 0;" >(leave blank to start at the beginning)</td>
      </tr>
      """)
print("""
      <tr style="--bg: #FFF; --pad: 10px;">
<td style="background-color: #FFF; padding: 10px 0; ">End at year: </td><td style="background-color: #FFF; padding: 10px 0; "><input type='text' name='end_year'></td><td style="background-color: #FFF; padding: 10px 0; ">(leave blank to go all the way to the present day lineup)</td>
      </tr>
      """)
print('<tr style="--bg: #FFF"><td><br></td><input type="hidden" name="run" value="run"/></tr>')
print("""
<tr style="--pad: 10px; --bg: #FFF"><td><button type="submit" class="btn btn-lg btn-outline-warning">Go!</button></td>
<td style="background-color: #FFF; padding: 10px 0;" colspan=2><i>note that the greater the timespan, the longer it will take to calculate. For the entire history of the court (both fields blank) it should complete in 1-3 <strike>minutes</strike>seconds.</i>
      </tr></tbody></table>
      """)
print("</form>")

