Release v2.3.0 Commit bbb4789 Deploy Commit #601b827

JobService · Nov 27, 2017 · 52b2f4b · 52b2f4b
1 parent 74c5646
commit 52b2f4b
Show file tree

Hide file tree

Showing 14 changed files with 539 additions and 126 deletions.
diff --git a/README.md b/README.md
@@ -35,12 +35,15 @@ The Docker Compose file contains the following services:
 4. Job Tracking Worker  
     For simplicity the Job Tracking Worker is not shown on the diagram above.  The diagram shows messages passing directly between the workers, but in reality the messages are passed through the Job Tracking Worker, which acts as a proxy for them.  It routes them to their intended destination but it also updates the Job Service Database with the progress.  This means that the Job Service is able to provide accurate progress reports when they are requested.
 
-5. GlobFilter Worker  
+5. Job Service Scheduled Executor  
+    This is a polling service that identifies jobs in the system that depend on other jobs which are now complete. It is an ExecutorService which schedules a task to execute repeatedly identifying jobs which are ready to run. For simplicity, this service is not shown in the diagram but for each job identified, a message is then published on RabbitMQ in order to start the job.
+
+6. GlobFilter Worker  
     This is a simple worker developed just for this demonstration.  It is a Batch Worker which takes in a glob-pattern as the Batch Definition.  Glob-patterns are generally fairly simple.  For example, `*.txt` means "all text files in the input folder".  Even more complex patterns like `**/t*.txt`, which means "all text files which start with the letter 't' and are in the input folder or in any subfolders of the input folder", are fairly easy to understand.  The worker produces a separate task for each file which matches the glob-pattern.
 
     By default the input folder is `./input-files`, which is a directory in this repository which contains a few sample text files in different languages.  A different input folder can be used by setting the `JOB_SERVICE_DEMO_INPUT_DIR` environment variable.
 
-6. Language Detection Worker  
+7. Language Detection Worker  
     This worker reads text files and determines what language or languages they are written in.  Typically it would return the result to another worker but for this demonstration it is configured to output the results to a folder.
 
     By default the output folder used is `./output-files`, but a different folder can be used by setting the `JOB_SERVICE_DEMO_OUTPUT_DIR` environment variable.

diff --git a/docker-compose.debug.yml b/docker-compose.debug.yml
@@ -37,3 +37,8 @@ services:
     ports:
       - "9456:5005"
       - "9457:8081"
+
+  # Increase log level of Job Service Scheduled Executor
+  jobservicescheduledexecutor:
+    environment:
+      CAF_LOG_LEVEL: DEBUG
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -13,15 +13,15 @@ services:
       CAF_STATUS_CHECK_TIME: 5
       CAF_TRACKING_PIPE: jobtracking-in
       CAF_WEBSERVICE_URL: http://jobservice:8080/job-service/v1
-    image: jobservice/job-service:2.2
+    image: jobservice/job-service:2.3.0
     ports:
       - "${JOB_SERVICE_PORT:-9411}:8080"
 
   jobservicedb:
     environment:
       POSTGRES_USER: postgres
       POSTGRES_PASSWORD: root
-    image: jobservice/job-service-postgres:2.2
+    image: jobservice/job-service-postgres:2.3.0
     volumes:
       - job-service-db:/var/lib/postgresql/data
 
@@ -52,12 +52,14 @@ services:
     env_file:
       - ./rabbitmq.env
     environment:
+      CAF_STATUS_CHECK_TIME: 5
+      CAF_WEBSERVICE_URL: http://jobservice:8080/job-service/v1
       CAF_WORKER_INPUT_QUEUE: jobtracking-in
       CAF_WORKER_OUTPUT_QUEUE: jobtracking-out
       JOB_DATABASE_URL: jdbc:postgresql://jobservicedb:5432/jobservice
       JOB_DATABASE_USERNAME: postgres
       JOB_DATABASE_PASSWORD: root
-    image: jobservice/worker-jobtracking:2.2
+    image: jobservice/worker-jobtracking:2.3.0
 
   worker-langdetect:
     depends_on:
@@ -73,6 +75,23 @@ services:
       - worker-datastore:/mnt/caf-datastore-root
       - ${JOB_SERVICE_DEMO_OUTPUT_DIR:-./output-files}:/mnt/caf-worker-output-dir
 
+  jobservicescheduledexecutor:
+    depends_on:
+      - rabbitmq
+      - jobservicedb
+    env_file:
+      - ./rabbitmq.env
+    environment:
+      CAF_WORKER_INPUT_QUEUE: jobservicescheduler-in
+      CAF_DATABASE_URL: jdbc:postgresql://jobservicedb:5432/jobservice
+      CAF_DATABASE_USERNAME: postgres
+      CAF_DATABASE_PASSWORD: root
+      CAF_STATUS_CHECK_TIME: 5
+      CAF_TRACKING_PIPE: jobtracking-in
+      CAF_WEBSERVICE_URL: http://jobservice:8080/job-service/v1
+      CAF_SCHEDULED_EXECUTOR_PERIOD: 10
+    image: jobservice/job-service-scheduled-executor:2.3.0
+
 volumes:
   job-service-db:
   rabbitmq:

diff --git a/production-marathon-prerequisites/README.md b/production-marathon-prerequisites/README.md
@@ -0,0 +1,36 @@
+# Production Marathon Prerequisites
+
+The Production Marathon Prerequisites deployment supports the deployment of the Job Service Database on Mesos/Marathon. This folder contains the marathon environment and template files that are required to deploy the Job Service Database and RabbitMQ.
+
+## Service Configuration
+
+### Marathon Template
+The `marathon.json.b` template file describes the marathon deployment information required for starting the Job Service Database. The template file uses property substitution to get values for configurable properties **required** for service deployment. These properties are configured in the marathon environment file `marathon.env`.
+
+### Marathon Environment
+The `marathon.env` file supports configurable property settings necessary for service deployment. These include:
+
+- `JOB_SERVICE_DB_PORT`: This configures the external port number on the host machine that will be forwarded to the Job Service Database containers internal 5432 port. This port is used to connect to the Job Service Database.
+
+- `JOB_SERVICE_DB_USER`: The username for the Postgres database.
+
+- `JOB_SERVICE_DB_PASSWORD`: The password for the Postgres database.
+
+### Additional Marathon Configuration
+The `marathon.json.b` deployment template file specifies default values for a number of additional settings which you may choose to modify directly for your custom deployment. These include:
+
+##### Application CPU, Memory and Instances
+
+- `cpus` : This setting can be used to configure the amount of CPU for the Job Service Database container. This does not have to be a whole number. **Default Value: 0.5**
+
+- `mem`: This configures the amount of RAM for the Job Service Database container. Note that this property does not configure the amount of RAM available to the container but is instead an upper limit. If the container's RAM exceeds this value it will cause docker to destroy and restart the container. **Default Value: 1024**
+
+- `instances`: This setting specifies the number of instances of the Job Service Database container to start on launch. **Default value: 1.**
+
+## Service Deployment
+In order to deploy the service application, issue the following command from the 'production-marathon' directory:
+
+	source ./marathon.env ; \
+	     cat marathon.json.b \
+	     | perl -pe 's/\$\{(\w+)\}/(exists $ENV{$1} && length $ENV{$1} > 0 ? $ENV{$1} : "NOT_SET_$1")/eg' \
+	     | curl -H "Content-Type: application/json" -d @- http://localhost:8080/v2/groups/
diff --git a/production-marathon-prerequisites/marathon.env b/production-marathon-prerequisites/marathon.env
@@ -0,0 +1,9 @@
+# The following properties MUST be configured before the Job Service Prerequisites can be deployed.
+
+export JOB_SERVICE_DB_PORT=NOT_SET_JOB_SERVICE_DB_PORT
+
+export JOB_SERVICE_DB_USER=NOT_SET_JOB_SERVICE_DB_USER
+export JOB_SERVICE_DB_PASSWORD=NOT_SET_JOB_SERVICE_DB_PASSWORD
+
+export CAF_RABBITMQ_PORT=NOT_SET_CAF_RABBITMQ_PORT
+export CAF_RABBITMQ_MANAGEMENT_PORT=NOT_SET_CAF_RABBITMQ_MANAGEMENT_PORT
diff --git a/production-marathon-prerequisites/marathon.json.b b/production-marathon-prerequisites/marathon.json.b
@@ -0,0 +1,60 @@
+{
+    "id": "jobservice-prerequisite",
+    "apps": [{
+            "id": "job-service-db",
+            "cpus": 0.5,
+            "mem": 1024,
+            "instances": 1,
+            "container": {
+                "docker": {
+                    "image": "jobservice/job-service-postgres:2.3.0",
+                    "network": "BRIDGE",
+                    "portMappings": [{
+                        "containerPort": 5432,
+                        "hostPort": 0,
+                        "protocol": "tcp",
+                        "servicePort": ${JOB_SERVICE_DB_PORT}
+                    }],
+                    "forcePullImage": true
+                },
+                "type": "DOCKER"
+            },
+            "env": {
+                "POSTGRES_USER": "${JOB_SERVICE_DB_USER}",
+                "POSTGRES_PASSWORD": "${JOB_SERVICE_DB_PASSWORD}"
+            }
+        },
+        {
+            "id": "rabbitmq",
+            "cpus": 0.4,
+            "mem": 1024,
+            "instances": 1,
+            "container": {
+                "docker": {
+                    "image": "rabbitmq:3-management",
+                    "network": "BRIDGE",
+                    "portMappings": [{
+                        "containerPort": 5672,
+                        "hostPort": 0,
+                        "protocol": "tcp",
+                        "servicePort": ${CAF_RABBITMQ_PORT}
+                    },
+                    {
+                        "containerPort": 15672,
+                        "hostPort": 0,
+                        "protocol": "tcp",
+                        "servicePort": ${CAF_RABBITMQ_MANAGEMENT_PORT}
+                    }],
+                    "forcePullImage": true
+                },
+                "type": "DOCKER",
+                "volumes": [
+                    {
+                        "containerPath": "/var/lib/rabbitmq",
+                        "hostPath": "rabbitmq",
+                        "mode": "RW"
+                    }
+                ]
+                }
+        }]
+}
diff --git a/production-marathon-testing/README.md b/production-marathon-testing/README.md
@@ -0,0 +1,115 @@
+# Production Marathon Testing
+
+The Production Marathon Testing deployment supports the deployment of the components required to smoke test a Job Service deployment on Mesos/Marathon. This folder contains the marathon environment and template files that are required to deploy the Glob Filter and Language Detection Workers.
+
+## Prerequisites
+
+### Docker login
+Before attempting to perform the Marathon deployments, a `docker login` command must be issued in order to make it possible to pull the required images from Docker Hub. The generic username and password for this are as follows:
+
+- **Username:** hpeemployee
+- **Password:** tomicrofocusandbeyond 
+
+## Service Configuration
+
+### Marathon Template
+The `marathon.json.b` template file describes the marathon deployment information required for starting the Glob Filter and Language Detection Workers. The template file uses property substitution to get values for configurable properties **required** for service deployment. These properties are configured in the marathon environment file `marathon.env`.
+
+### Marathon Environment
+The `marathon.env` file supports configurable property settings necessary for service deployment. These include:
+
+- `CAF_RABBITMQ_HOST`: The hostname for the RabbitMQ instance
+- `CAF_RABBITMQ_PORT`: The port for the RabbitMQ instance
+- `CAF_RABBITMQ_PASSWORD`: The password for the RabbitMQ instance
+- `CAF_RABBITMQ_USERNAME`: The username for the RabbitMQ instance
+
+- `CAF_WORKER_GLOBFILTER_INPUT_QUEUE`: The RabbitMQ queue on which the Glob Filter worker listens
+- `CAF_BATCH_WORKER_ERROR_QUEUE`: The RabbitMQ queue where failed Glob Filter worker messages go
+- `CAF_GLOB_WORKER_BINARY_DATA_INPUT_FOLDER`: The location of the mounted directory inside the container where the test files are located
+
+- `CAF_WORKER_LANGDETECT_INPUT_QUEUE`: The RabbitMQ queue on which the Language Detection worker listens
+- `CAF_WORKER_LANGDETECT_OUTPUT_QUEUE`: The RabbitMQ queue on which the Language Detection worker outputs messages
+- `CAF_LANG_DETECT_WORKER_OUTPUT_FOLDER`: The folder in which the Language Detection worker places result files
+
+- `JOB_SERVICE_DEMO_INPUT_DIR`: The directory where the test files are located on the host
+- `JOB_SERVICE_DEMO_OUTPUT_DIR`: The output directory for test results on the host
+
+- `CAF_WORKER_STORAGE_HOST_DATA_DIRECTORY`: The directory on the host that the Glob Filter and Language Detection workers can use as a datastore  
+
+### Additional Marathon Configuration
+The `marathon.json.b` deployment template file specifies default values for a number of additional settings which you may choose to modify directly for your custom deployment. These include:
+
+##### Application CPU, Memory and Instances
+
+- `cpus` : This setting can be used to configure the amount of CPU for the Glob Filter and Language Detection Worker containers. This does not have to be a whole number. **Default Value: 0.5**
+
+- `mem`: This configures the amount of RAM for the Glob Filter and Language Detection Worker containers. Note that this property does not configure the amount of RAM available to the containers but is instead an upper limit. If the container's RAM exceeds this value it will cause docker to destroy and restart the container. **Default Value: 1024**
+
+- `instances`: This setting specifies the number of instances of the Glob Filter and Language Detection Worker containers to start on launch. **Default value: 1.**
+
+## Service Deployment
+
+1. Deploy the Production Marathon Prerequisite services as described [here](../production-marathon-prerequisites/README.md)
+
+2. Deploy the Production Marathon services as described [here](../production-marathon/README.md).
+
+3. Deploy the testing Docker containers for Job Service by issuing the following command from the 'production-marathon-testing' directory:
+
+		source ./marathon.env ; \
+	     	cat marathon.json.b \
+	     	| perl -pe 's/\$\{(\w+)\}/(exists $ENV{$1} && length $ENV{$1} > 0 ? $ENV{$1} : "NOT_SET_$1")/eg' \
+	     	| curl -H "Content-Type: application/json" -d @- http://localhost:8080/v2/groups/
+
+4. Navigate to the Job Service UI  
+    The Job Service is a RESTful Web Service and is primarily intended for programmatic access, however it also ships with a Swagger-generated user-interface.
+
+    Using a browser, navigate to the `/job-service-ui` endpoint on the Job Service:  
+
+        http://<DOCKER-HOST>:<JOB-SERVICE-PORT>/job-service-ui
+
+    Adjust '<DOCKER-HOST>` and `<JOB-SERVICE-PORT>' to be the name of your own environment.
+
+5. Try the `GET /jobStats/count` operation  
+    Click on this operation and then click on the 'Try it out!' button.
+
+    You should see the response is zero as you have not yet created any jobs.
+
+6. Create a Job  
+    Go to the `PUT /jobs/{jobId}` operation.
+
+    - Choose a Job Id, for example, `DemoJob`, and set it in the `jobId` parameter.
+    - Enter the following Job Definition into the `newJob` parameter:
+
+        <pre><code>{
+          "name": "Some job name",
+          "description": "The description of the job",
+          "task": {
+            "taskClassifier": "BatchWorker",
+            "taskApiVersion": 1,
+            "taskData": {
+              "batchType": "GlobPattern",
+              "batchDefinition": "*.txt",
+              "taskMessageType": "DocumentMessage",
+              "taskMessageParams": {
+                "field:binaryFile": "CONTENT",
+                "field:fileName": "FILE_NAME",
+                "cd:outputSubfolder": "subDir"
+              },
+              "targetPipe": "languageidentification-in"
+            },
+            "taskPipe": "globfilter-in",
+            "targetPipe": "languageidentification-out"
+          }
+        }</code></pre>
+
+7. Check on the Job's progress  
+    Go to the `GET /jobs/{jobId}` operation.
+
+    - Enter the Job Id that you chose when creating the job.
+    - Click on the 'Try it out!' button.
+
+    You should see a response returned from the Job Service.
+    - If the job is still in progress then the `status` field will be `Active` and the `percentageComplete` field will indicate the progress of the job.
+    - If the job has finished then the `status` field will be `Completed`.
+
+    Given that the Language Detection Worker is configured to output the results to files in a folder you should see that these files have been created in the output folder.  If you examine the output files you should see that they contain the details of what languages were detected in the corresponding input files.
diff --git a/production-marathon-testing/marathon.env b/production-marathon-testing/marathon.env
@@ -0,0 +1,19 @@
+# The following properties MUST be configured before the Job Service Prerequisites can be deployed.
+
+export CAF_RABBITMQ_HOST=NOT_SET_CAF_RABBITMQ_HOST
+export CAF_RABBITMQ_PORT=NOT_SET_CAF_RABBITMQ_PORT
+export CAF_RABBITMQ_PASSWORD=NOT_SET_CAF_RABBITMQ_PASSWORD
+export CAF_RABBITMQ_USERNAME=NOT_SET_CAF_RABBITMQ_USERNAME
+
+export CAF_WORKER_GLOBFILTER_INPUT_QUEUE=NOT_SET_CAF_WORKER_GLOBFILTER_INPUT_QUEUE
+export CAF_BATCH_WORKER_ERROR_QUEUE=NOT_SET_CAF_BATCH_WORKER_ERROR_QUEUE
+export CAF_GLOB_WORKER_BINARY_DATA_INPUT_FOLDER=NOT_SET_CAF_GLOB_WORKER_BINARY_DATA_INPUT_FOLDER
+
+export CAF_WORKER_LANGDETECT_INPUT_QUEUE=NOT_SET_CAF_WORKER_LANGDETECT_INPUT_QUEUE
+export CAF_WORKER_LANGDETECT_OUTPUT_QUEUE=NO_SET_CAF_WORKER_LANGDETECT_OUTPUT_QUEUE
+export CAF_LANG_DETECT_WORKER_OUTPUT_FOLDER=NOT_SET_CAF_LANG_DETECT_WORKER_OUTPUT_FOLDER
+
+export JOB_SERVICE_DEMO_INPUT_DIR=NOT_SET_JOB_SERVICE_DEMO_INPUT_DIR
+export JOB_SERVICE_DEMO_OUTPUT_DIR=NOT_SET_JOB_SERVICE_DEMO_OUTPUT_DIR
+
+export CAF_WORKER_STORAGE_HOST_DATA_DIRECTORY=NOT_SET_CAF_WORKER_STORAGE_HOST_DATA_DIRECTORY