From 2e82ad90811e77b40d710794483ce833d023de2f Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Wed, 22 Mar 2017 17:18:02 -0700 Subject: [PATCH 01/11] Adding HTTP checks to fix #16 --- rancher.py | 103 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 6 deletions(-) diff --git a/rancher.py b/rancher.py index 90e09bf..27e8c4c 100755 --- a/rancher.py +++ b/rancher.py @@ -12,6 +12,12 @@ from OpenSSL import crypto from requests.auth import HTTPBasicAuth +# how long (in seconds) we want to wait for HTTP request to complete before throwing an error +CONNECT_TIMEOUT = 0.5 + +# how long to back off connection time before trying request again (in seconds) +CONNECT_WAIT = 10 + try: RANCHER_URL = os.environ['CATTLE_URL'] RANCHER_ACCESS_KEY = os.environ['CATTLE_ACCESS_KEY'] @@ -41,10 +47,27 @@ def auth(self): def get_certificate(self): ''' - return json(python dict) of of certificate listing api endpoint + return json(python dict) of certificate listing api endpoint ''' url = "{0}/certificate".format(RANCHER_URL) - r = requests.get(url=url, auth=self.auth()) + # make sure we loop until we get valid data back from server + done = False + while not done: + try: + r = requests.get(url=url, auth=self.auth(), timeout=CONNECT_TIMEOUT) + except requests.exceptions.ConnectionError as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_certificate", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + except requests.exceptions.ConnectTimeout as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_certificate", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + # done with exceptions + # if we have a valid status code we should be ok + if(r.status_code): + done = True + return r.json()['data'] def get_issuer_for_certificates(self): @@ -104,7 +127,23 @@ def delete_cert(self, server): ''' print "Deleting {0} cert from Rancher API".format(server) url = "{0}/projects/{1]/certificates/{2}".format(RANCHER_URL, self.get_project_id(), self.get_certificate_id(server)) - r = requests.delete(url=url, auth=self.auth()) + done = False + while not done: + try: + r = requests.delete(url=url, auth=self.auth(), timeout=CONNECT_TIMEOUT) + except requests.exceptions.ConnectionError as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "delete_cert", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + except requests.exceptions.ConnectTimeout as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "delete_cert", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + # done with exceptions + # if we have a valid status code we should be ok + if(r.status_code): + done = True + print "Delete cert status code: {0}".format(r.status_code) print "Sleeping for two minutes because rancher sucks and takes FOREVER to purge a deleted certificate" time.sleep(120) @@ -270,7 +309,23 @@ def post_cert(self, server): json_structure['uuid'] = None headers = {'Content-Type': 'application/json'} - r = request_type(url=url, data=json.dumps(json_structure), headers=headers, auth=self.auth()) + done = False + while not done: + try: + r = request_type(url=url, data=json.dumps(json_structure), headers=headers, auth=self.auth(), timeout=CONNECT_TIMEOUT) + except requests.exceptions.ConnectionError as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "post_cert", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + except requests.exceptions.ConnectTimeout as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "post_cert", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + # done with exceptions + # if we have a valid status code we should be ok + if(r.status_code): + done = True + print "HTTP status code: {0}".format(r.status_code) else: print "Could not find cert files inside post_cert method!" @@ -281,7 +336,23 @@ def get_project_id(self): --> /projects/1a5/certificate ''' url = "{0}/projects".format(RANCHER_URL) - r = requests.get(url=url, auth=self.auth()) + done = False + while not done: + try: + r = requests.get(url=url, auth=self.auth(), timeout=CONNECT_TIMEOUT) + except requests.exceptions.ConnectionError as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_project_id", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + except requests.exceptions.ConnectTimeout as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_project_id", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + # done with exceptions + # if we have a valid status code we should be ok + if(r.status_code): + done = True + j = r.json() return j['data'][0]['id'] @@ -388,7 +459,27 @@ def check_hostnames_and_ports(self): # a 301 redirect. Also, if we get a 503 service unavailable status code there is no lets-encrypt nginx # container working, and we should continue to wait and NOT requests Let's Encrypt certificates yet. url = "http://{0}:{1}/.well-known/acme-challenge/".format(host, HOST_CHECK_PORT) - r = requests.get(url, allow_redirects=False) + + # at this point the port is open, but it may not respond with a valid http response + # so we need to check that it returns a valid http response and the connection can be opened + + cannot_connect = True + while cannot_connect: + try: + r = requests.get(url, allow_redirects=False, timeout=CONNECT_TIMEOUT) + except requests.exceptions.ConnectionError as e: + print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) + print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + except requests.exceptions.ConnectTimeout as e: + print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) + print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + # can connect, so check we got a valid response code + if(r.status_code): + # we can connect now! + cannot_connect = False + if(r.status_code != 503 and r.status_code != 301): print "\t\tOK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) done = True From ed5e0fcfaeb44f48f9fd701360b34a392ca8148d Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 09:59:38 -0700 Subject: [PATCH 02/11] Need to skip rest of loop, and continue waiting to connect. --- rancher.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rancher.py b/rancher.py index 27e8c4c..c5fa3fd 100755 --- a/rancher.py +++ b/rancher.py @@ -59,10 +59,12 @@ def get_certificate(self): print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_certificate", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue except requests.exceptions.ConnectTimeout as e: print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_certificate", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue # done with exceptions # if we have a valid status code we should be ok if(r.status_code): @@ -135,10 +137,12 @@ def delete_cert(self, server): print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "delete_cert", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue except requests.exceptions.ConnectTimeout as e: print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "delete_cert", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue # done with exceptions # if we have a valid status code we should be ok if(r.status_code): @@ -317,10 +321,12 @@ def post_cert(self, server): print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "post_cert", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue except requests.exceptions.ConnectTimeout as e: print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "post_cert", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue # done with exceptions # if we have a valid status code we should be ok if(r.status_code): @@ -344,10 +350,12 @@ def get_project_id(self): print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_project_id", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue except requests.exceptions.ConnectTimeout as e: print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_project_id", str(e)) print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue # done with exceptions # if we have a valid status code we should be ok if(r.status_code): @@ -471,10 +479,12 @@ def check_hostnames_and_ports(self): print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue except requests.exceptions.ConnectTimeout as e: print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) time.sleep(CONNECT_WAIT) + continue # can connect, so check we got a valid response code if(r.status_code): # we can connect now! From 92821d214e0338c9abd5e98cd6ff925c5c11928a Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 10:24:07 -0700 Subject: [PATCH 03/11] Fixes #19 --- rancher.py | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/rancher.py b/rancher.py index c5fa3fd..96df7c6 100755 --- a/rancher.py +++ b/rancher.py @@ -33,7 +33,7 @@ HOST_CHECK_PORT = int(os.environ['HOST_CHECK_PORT']) except KeyError as e: - print "Could not find an Environment variable set." + print "ERROR: Could not find an Environment variable set." print e @@ -113,7 +113,7 @@ def rancher_certificate_expired(self, server): expires_at = certificate['expiresAt'] timestamp = datetime.strptime(expires_at, '%a %b %d %H:%M:%S %Z %Y') expiry = int(timestamp.strftime("%s")) - print "Found cert: {0}, Expiry: {1}".format(cn, expiry) + print "INFO: Found cert: {0}, Expiry: {1}".format(cn, expiry) now = int(time.time()) if(self.expiring(expiry)): return True @@ -148,8 +148,8 @@ def delete_cert(self, server): if(r.status_code): done = True - print "Delete cert status code: {0}".format(r.status_code) - print "Sleeping for two minutes because rancher sucks and takes FOREVER to purge a deleted certificate" + print "INFO: Delete cert status code: {0}".format(r.status_code) + print "INFO: Sleeping for two minutes because rancher sucks and takes FOREVER to purge a deleted certificate" time.sleep(120) def get_certificate_id(self, server): @@ -176,7 +176,7 @@ def expiring(self, cert_time): return False def renew_certificate(self, server): - print "Renewing certificate for {0}".format(server) + print "INFO: Renewing certificate for {0}".format(server) self.create_cert(server) def check_cert_files_exist(self, server): @@ -193,7 +193,7 @@ def check_cert_files_exist(self, server): def loop(self): while True: self.cert_manager() - print "Sleeping: {0} seconds...".format(LOOP_TIME) + print "INFO: Sleeping: {0} seconds...".format(LOOP_TIME) time.sleep(LOOP_TIME) def cert_manager(self): @@ -225,15 +225,15 @@ def cert_manager(self): # cert in rancher server_cert_issuer = issuers[server]['issuer'] if("Fake" in server_cert_issuer and not STAGING): - # upgarde staging cert to production - print "Upgrading staging cert to production for {0}".format(server) + # upgrade staging cert to production + print "INFO: Upgrading staging cert to production for {0}".format(server) self.create_cert(server) self.post_cert(server) elif("X3" not in server_cert_issuer and not STAGING): # we have a self-signed certificate we should replace with a prod certificate. # this should only happen once on initial rancher install. - print "Replacing self-signed certificate: {0}, {1} with production LE cert".format(server, server_cert_issuer) + print "INFO: Replacing self-signed certificate: {0}, {1} with production LE cert".format(server, server_cert_issuer) self.create_cert(server) self.post_cert(server) @@ -253,7 +253,7 @@ def cert_manager(self): self.post_cert(server) def create_cert(self, server): - print "need to create cert for {0}".format(server) + print "INFO: Need to create cert for {0}".format(server) # TODO this is incredibly hacky. Certbot is python code so there should be a way to do this without shelling out to the cli certbot tool. (certbot docs suck btw) # https://www.metachris.com/2015/12/comparison-of-10-acme-lets-encrypt-clients/#client-simp_le maybe? if(STAGING): @@ -266,9 +266,9 @@ def create_cert(self, server): # read cert in from file if proc.returncode == 0: # made cert hopefully *crosses fingers* - print "certbot seems to have run with exit code 0" + print "INFO: certbot seems to have run with exit code 0" else: - print "an error occured during cert creation." + print "INFO: certbot -- an error occured during cert creation. Non-zero Status code ({})".format(proc.returncode) # print stdout from subprocess print com @@ -332,9 +332,9 @@ def post_cert(self, server): if(r.status_code): done = True - print "HTTP status code: {0}".format(r.status_code) + print "INFO: HTTP status code: {0}".format(r.status_code) else: - print "Could not find cert files inside post_cert method!" + print "INFO: Could not find cert files inside post_cert method!" def get_project_id(self): ''' @@ -376,7 +376,7 @@ def read_cert(self, server): cert = openfile.read().rstrip('\n') return cert else: - print "Could not find file: {0}".format(cert_file) + print "ERROR: Could not find file: {0}".format(cert_file) return None def read_privkey(self, server): @@ -391,7 +391,7 @@ def read_privkey(self, server): privkey = openfile.read().rstrip('\n') return privkey else: - print "Could not find file: {0}".format(privkey_file) + print "ERROR: Could not find file: {0}".format(privkey_file) return None def read_fullchain(self, server): @@ -405,7 +405,7 @@ def read_fullchain(self, server): fullchain = openfile.read().rstrip('\n') return fullchain else: - print "Could not find file: {0}".format(fullchain_file) + print "ERROR: Could not find file: {0}".format(fullchain_file) return None def read_chain(self, server): @@ -419,7 +419,7 @@ def read_chain(self, server): chain = openfile.read().rstrip('\n') return chain else: - print "Could not find file: {0}".format(chain_file) + print "ERROR: Could not find file: {0}".format(chain_file) return None def parse_servernames(self): @@ -433,7 +433,7 @@ def get_rancher_certificate_servers(self): cns = [] for certificate in returned_json: if(certificate['state'] == "active"): - print "CN: {0} is active".format(certificate['CN']) + print "INFO: CN: {0} is active".format(certificate['CN']) cns.append(certificate['CN']) return cns @@ -453,14 +453,14 @@ def check_hostnames_and_ports(self): done = False while not done: # something failed since we are not done - print "Sleeping during host lookups for {0} seconds".format(HOST_CHECK_LOOP_TIME) + print "INFO: Sleeping during host lookups for {0} seconds".format(HOST_CHECK_LOOP_TIME) time.sleep(HOST_CHECK_LOOP_TIME) # make sure all hostnames can be resolved and are listening on open ports for host in self.parse_servernames(): if(self.hostname_resolves(host)): - print "Hostname: {0} resolves".format(host) + print "INFO: Hostname: {0} resolves".format(host) if(self.port_open(host, HOST_CHECK_PORT)): - print "\tPort {0} open on {1}".format(HOST_CHECK_PORT, host) + print "\tINFO: Port {0} open on {1}".format(HOST_CHECK_PORT, host) # check if the /.well-known/acme-challenge/ directory isn't returning a 301 redirect # this is caused by the rancher load balancer not picking up the lets-encrypt service # and not directing traffic to it. Instead the redirection service gets the requests and returns @@ -491,18 +491,18 @@ def check_hostnames_and_ports(self): cannot_connect = False if(r.status_code != 503 and r.status_code != 301): - print "\t\tOK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) + print "\t\tINFO: OK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) done = True else: - print "\t\tReceived bad HTTP status code ({0}) from ({1})".format(r.status_code, host) + print "\t\tINFO: Received bad HTTP status code ({0}) from ({1})".format(r.status_code, host) done = False else: - print "Could not connect to port {0} on host {1}".format(HOST_CHECK_PORT, host) + print "INFO: Could not connect to port {0} on host {1}".format(HOST_CHECK_PORT, host) done = False else: - print "Could not lookup hostname for {0}".format(host) + print "INFO: Could not lookup DNS hostname for {0}".format(host) done = False - print "continuing on to letsencrypt cert provisioning since all hosts seem to be up!" + print "INFO: Continuing on to letsencrypt cert provisioning since all hosts seem to be up!" if __name__ == "__main__": service = RancherService() From d5bc0a1a12bd2733d855575e38751716b70c5a7e Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 10:29:16 -0700 Subject: [PATCH 04/11] Add notes about Cattle, rancher versions --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d2a5919..bb8fe04 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,8 @@ frontend http-frontend - DNS control of domain names (ability to create host.subdomain.domain.com records to point to Rancher IP) - Front-end load balancer exposing a privileged port (less than 1024) to the internet for Let's Encrypt verification - This Rancher service +- Rancher Cattle as Container Scheduler/Orchestrator +- Rancher v1.1.4 - v1.4.2 (versions tested with this service) ## How to use From b5f59d0ebafde864ec5e836439071e9bd7b0e2b2 Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 11:09:33 -0700 Subject: [PATCH 05/11] Add docs on environment variables, fix #20 --- rancher.py | 54 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/rancher.py b/rancher.py index 96df7c6..cbe7a96 100755 --- a/rancher.py +++ b/rancher.py @@ -11,30 +11,54 @@ import requests from OpenSSL import crypto from requests.auth import HTTPBasicAuth - -# how long (in seconds) we want to wait for HTTP request to complete before throwing an error -CONNECT_TIMEOUT = 0.5 - -# how long to back off connection time before trying request again (in seconds) -CONNECT_WAIT = 10 +import sys try: + # These variables should all get set as this service is a Rancher Agent + # Therefore, Rancher sets these for us. RANCHER_URL = os.environ['CATTLE_URL'] RANCHER_ACCESS_KEY = os.environ['CATTLE_ACCESS_KEY'] RANCHER_SECRET_KEY = os.environ['CATTLE_SECRET_KEY'] + + # list of domains we want certs for, comma-delimited DOMAINS = os.environ['DOMAINS'] # convert renew days -> seconds - RENEW_THRESHOLD = int(os.environ['RENEW_BEFORE_DAYS']) * (24 * 60 * 60) - LOOP_TIME = int(os.environ['LOOP_TIME']) - CERTBOT_WEBROOT = os.environ['CERTBOT_WEBROOT'] + + # we are now using os.getenv + # the first argument is the environment variable that is set inside the container + # if the environment variable is not set, then we use the default, the second arg + # this is only used for variables we can have defaults for, such as days + # we cannot use this for things like Rancher URL, Access keys, etc. + # therefore the below are *optional* to set + + # how long (in seconds) we want to wait for HTTP request to complete before throwing an error + CONNECT_TIMEOUT = int(os.getenv('CONNECT_TIMEOUT', 0.5)) + # how long to back off connection time before trying request again (in seconds) + CONNECT_WAIT = int(os.getenv('CONNECT_WAIT', 10)) + # how long, in days, before our cert expires should we renew it? + RENEW_THRESHOLD = int(os.getenv('RENEW_BEFORE_DAYS', 14)) * (24 * 60 * 60) + # sleep time before checking certs again + LOOP_TIME = int(os.getenv('LOOP_TIME', 300)) + # Shared webroot directory between Rancher Lets Encrypt service and Nginx container that + # serves the ACME requests + CERTBOT_WEBROOT = os.getenv('CERTBOT_WEBROOT', '/var/www') + # Where the lets encrypt files live, such as certificates, private keys, etc + LETSENCRYPT_ROOTDIR = os.getenv('LETSENCRYPT_ROOTDIR', '/etc/letsencrypt') + # email to register with letsencrypt with CERTBOT_EMAIL = os.environ['CERTBOT_EMAIL'] + # If this is set to True, we will create a "Dummy" LetsEncrypt certificate. Useful for testing. + # If you want production LE certs, Set to "False" Which will get a valid LE signed cert for you. STAGING = os.environ['STAGING'] == "True" + # how long to wait until we check our domains are up again when doing port/http checks. HOST_CHECK_LOOP_TIME = int(os.environ['HOST_CHECK_LOOP_TIME']) + # which port to use for LetsEncrypt verification. Defaults to 80. HOST_CHECK_PORT = int(os.environ['HOST_CHECK_PORT']) except KeyError as e: print "ERROR: Could not find an Environment variable set." print e + # exit the service since this failed. + sys.exit(1) class RancherService: @@ -183,7 +207,7 @@ def check_cert_files_exist(self, server): ''' check if certs files already exist on disk. If they are on disk and not in rancher, publish them in rancher. ''' - cert_dir = '/etc/letsencrypt/live/{0}/'.format(server) + cert_dir = '{0}/live/{1}/'.format(LETSENCRYPT_ROOTDIR, server) cert = '{0}/cert.pem'.format(cert_dir) privkey = '{0}/privkey.pem'.format(cert_dir) fullchain = '{0}/fullchain.pem'.format(cert_dir) @@ -274,7 +298,7 @@ def create_cert(self, server): def local_cert_expired(self, cert_string): ''' - if there is a certificate in /etc/letsencrypt, we should check that it is itself valid and not about to expire. + if there is a certificate in LETSENCRYPT_ROOTDIR, we should check that it is itself valid and not about to expire. ''' cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_string) timestamp = datetime.strptime(cert.get_notAfter(), "%Y%m%d%H%M%SZ") @@ -369,7 +393,7 @@ def read_cert(self, server): Read cert.pem file from letsencrypt directory and return the contents as a string ''' - cert_file = "/etc/letsencrypt/live/{0}/{1}".format(server, "cert.pem") + cert_file = "{0}/live/{1}/{2}".format(LETSENCRYPT_ROOTDIR, server, "cert.pem") if(os.path.isfile(cert_file)): # read files and post the correct info to populate rancher with open(cert_file, 'r') as openfile: @@ -384,7 +408,7 @@ def read_privkey(self, server): Read privkey.pem file from letsencrypt directory and return the contents as a string ''' - privkey_file = "/etc/letsencrypt/live/{0}/{1}".format(server, "privkey.pem") + privkey_file = "{0}/live/{1}/{2}".format(LETSENCRYPT_ROOTDIR, server, "privkey.pem") if(os.path.isfile(privkey_file)): # read files and post the correct info to populate rancher with open(privkey_file, 'r') as openfile: @@ -399,7 +423,7 @@ def read_fullchain(self, server): Read fullchain.pem file from letsencrypt directory. and return the contents as a string ''' - fullchain_file = "/etc/letsencrypt/live/{0}/{1}".format(server, "fullchain.pem") + fullchain_file = "{0}/live/{1}/{2}".format(LETSENCRYPT_ROOTDIR, server, "fullchain.pem") if(os.path.isfile(fullchain_file)): with open(fullchain_file, 'r') as openfile: fullchain = openfile.read().rstrip('\n') @@ -413,7 +437,7 @@ def read_chain(self, server): Read chain.pem file from letsencrypt directory. and return the contents as a string ''' - chain_file = "/etc/letsencrypt/live/{0}/{1}".format(server, "chain.pem") + chain_file = "{0}/live/{1}/{2}".format(LETSENCRYPT_ROOTDIR, server, "chain.pem") if(os.path.isfile(chain_file)): with open(chain_file, 'r') as openfile: chain = openfile.read().rstrip('\n') From 44cdfff16b1faeee76ceed5f123211e4ffa02e8b Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 11:18:34 -0700 Subject: [PATCH 06/11] Debug print --- rancher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rancher.py b/rancher.py index cbe7a96..89d8f2b 100755 --- a/rancher.py +++ b/rancher.py @@ -499,6 +499,7 @@ def check_hostnames_and_ports(self): while cannot_connect: try: r = requests.get(url, allow_redirects=False, timeout=CONNECT_TIMEOUT) + print "DEBUG: trying connect" except requests.exceptions.ConnectionError as e: print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) From a9953b79a36a671df3e95e3f21c485585e88abe8 Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 11:24:36 -0700 Subject: [PATCH 07/11] Remove loop for http check. --- rancher.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/rancher.py b/rancher.py index 89d8f2b..a44b85d 100755 --- a/rancher.py +++ b/rancher.py @@ -494,26 +494,7 @@ def check_hostnames_and_ports(self): # at this point the port is open, but it may not respond with a valid http response # so we need to check that it returns a valid http response and the connection can be opened - - cannot_connect = True - while cannot_connect: - try: - r = requests.get(url, allow_redirects=False, timeout=CONNECT_TIMEOUT) - print "DEBUG: trying connect" - except requests.exceptions.ConnectionError as e: - print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) - print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) - time.sleep(CONNECT_WAIT) - continue - except requests.exceptions.ConnectTimeout as e: - print "\t\tERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "check_hostnames_and_ports", str(e)) - print "\t\tERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) - time.sleep(CONNECT_WAIT) - continue - # can connect, so check we got a valid response code - if(r.status_code): - # we can connect now! - cannot_connect = False + r = requests.get(url, allow_redirects=False, timeout=CONNECT_TIMEOUT) if(r.status_code != 503 and r.status_code != 301): print "\t\tINFO: OK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) From 374714144632be33e07087053cc543dca7cff096 Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 11:34:19 -0700 Subject: [PATCH 08/11] longer timeout --- rancher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rancher.py b/rancher.py index a44b85d..db9faa3 100755 --- a/rancher.py +++ b/rancher.py @@ -494,7 +494,7 @@ def check_hostnames_and_ports(self): # at this point the port is open, but it may not respond with a valid http response # so we need to check that it returns a valid http response and the connection can be opened - r = requests.get(url, allow_redirects=False, timeout=CONNECT_TIMEOUT) + r = requests.get(url, allow_redirects=False, timeout=(CONNECT_TIMEOUT * 4)) if(r.status_code != 503 and r.status_code != 301): print "\t\tINFO: OK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) From 7af1081f1d639210e1da6edf65900a7baa4a9680 Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 11:42:38 -0700 Subject: [PATCH 09/11] no timeout --- rancher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rancher.py b/rancher.py index db9faa3..e21eeb5 100755 --- a/rancher.py +++ b/rancher.py @@ -494,7 +494,7 @@ def check_hostnames_and_ports(self): # at this point the port is open, but it may not respond with a valid http response # so we need to check that it returns a valid http response and the connection can be opened - r = requests.get(url, allow_redirects=False, timeout=(CONNECT_TIMEOUT * 4)) + r = requests.get(url, allow_redirects=False) if(r.status_code != 503 and r.status_code != 301): print "\t\tINFO: OK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) From 1837386ffa23cbc846c351643ba959af5bef289f Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Thu, 23 Mar 2017 12:03:05 -0700 Subject: [PATCH 10/11] Add this back in --- rancher.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/rancher.py b/rancher.py index e21eeb5..1c26941 100755 --- a/rancher.py +++ b/rancher.py @@ -32,7 +32,7 @@ # therefore the below are *optional* to set # how long (in seconds) we want to wait for HTTP request to complete before throwing an error - CONNECT_TIMEOUT = int(os.getenv('CONNECT_TIMEOUT', 0.5)) + CONNECT_TIMEOUT = int(os.getenv('CONNECT_TIMEOUT', 10)) # how long to back off connection time before trying request again (in seconds) CONNECT_WAIT = int(os.getenv('CONNECT_WAIT', 10)) # how long, in days, before our cert expires should we renew it? @@ -494,7 +494,25 @@ def check_hostnames_and_ports(self): # at this point the port is open, but it may not respond with a valid http response # so we need to check that it returns a valid http response and the connection can be opened - r = requests.get(url, allow_redirects=False) + + valid_http = False + while not valid_http: + try: + r = requests.get(url, allow_redirects=False) + except requests.exceptions.ConnectionError as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_certificate", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + continue + except requests.exceptions.ConnectTimeout as e: + print "ERROR: Cannot connect to URL: {0} for method {1}. Full error: {2}".format(url, "get_certificate", str(e)) + print "ERROR: Trying to reconnect in {0} seconds".format(CONNECT_WAIT) + time.sleep(CONNECT_WAIT) + continue + # done with exceptions + # if we have a valid status code we should be ok + if(r.status_code): + valid_http = True if(r.status_code != 503 and r.status_code != 301): print "\t\tINFO: OK, got HTTP status code ({0}) for ({1})".format(r.status_code, host) From 7d4c6d2c27b044c7db2b80287e0fda7a968dc73e Mon Sep 17 00:00:00 2001 From: Ben Reichert Date: Tue, 4 Apr 2017 13:03:25 -0700 Subject: [PATCH 11/11] missed a few environment variables that should have defaults. --- rancher.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rancher.py b/rancher.py index 1c26941..937a2f7 100755 --- a/rancher.py +++ b/rancher.py @@ -20,10 +20,13 @@ RANCHER_ACCESS_KEY = os.environ['CATTLE_ACCESS_KEY'] RANCHER_SECRET_KEY = os.environ['CATTLE_SECRET_KEY'] + ### These environment variables are required to be set! ### + # email to register with letsencrypt with + CERTBOT_EMAIL = os.environ['CERTBOT_EMAIL'] # list of domains we want certs for, comma-delimited DOMAINS = os.environ['DOMAINS'] - # convert renew days -> seconds + ### These environment variables will be set to defaults if they are not defined! ### # we are now using os.getenv # the first argument is the environment variable that is set inside the container # if the environment variable is not set, then we use the default, the second arg @@ -44,15 +47,13 @@ CERTBOT_WEBROOT = os.getenv('CERTBOT_WEBROOT', '/var/www') # Where the lets encrypt files live, such as certificates, private keys, etc LETSENCRYPT_ROOTDIR = os.getenv('LETSENCRYPT_ROOTDIR', '/etc/letsencrypt') - # email to register with letsencrypt with - CERTBOT_EMAIL = os.environ['CERTBOT_EMAIL'] # If this is set to True, we will create a "Dummy" LetsEncrypt certificate. Useful for testing. # If you want production LE certs, Set to "False" Which will get a valid LE signed cert for you. - STAGING = os.environ['STAGING'] == "True" + STAGING = os.getenv('STAGING', "True") == "True" # how long to wait until we check our domains are up again when doing port/http checks. - HOST_CHECK_LOOP_TIME = int(os.environ['HOST_CHECK_LOOP_TIME']) + HOST_CHECK_LOOP_TIME = int(os.getenv('HOST_CHECK_LOOP_TIME', 10)) # which port to use for LetsEncrypt verification. Defaults to 80. - HOST_CHECK_PORT = int(os.environ['HOST_CHECK_PORT']) + HOST_CHECK_PORT = int(os.getenv('HOST_CHECK_PORT', 80)) except KeyError as e: print "ERROR: Could not find an Environment variable set."