summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ARCH413
-rw-r--r--README96
-rwxr-xr-xcheck5
-rw-r--r--etc/lighttpd/lorry-controller-webapp-httpd.conf33
-rwxr-xr-xlorry-controller-minion302
-rwxr-xr-xlorry-controller-webapp223
-rw-r--r--lorry-controller.morph14
-rw-r--r--lorrycontroller/__init__.py44
-rw-r--r--lorrycontroller/gitano.py130
-rw-r--r--lorrycontroller/givemejob.py130
-rw-r--r--lorrycontroller/jobupdate.py77
-rw-r--r--lorrycontroller/listjobs.py63
-rw-r--r--lorrycontroller/listqueue.py33
-rw-r--r--lorrycontroller/listrunningjobs.py34
-rw-r--r--lorrycontroller/lstroves.py217
-rw-r--r--lorrycontroller/maxjobs.py55
-rw-r--r--lorrycontroller/movetopbottom.py58
-rw-r--r--lorrycontroller/pretendtime.py42
-rw-r--r--lorrycontroller/proxy.py51
-rw-r--r--lorrycontroller/readconf.py347
-rw-r--r--lorrycontroller/removejob.py44
-rw-r--r--lorrycontroller/route.py53
-rw-r--r--lorrycontroller/showjob.py83
-rw-r--r--lorrycontroller/showlorry.py86
-rw-r--r--lorrycontroller/startstopqueue.py55
-rw-r--r--lorrycontroller/statedb.py577
-rw-r--r--lorrycontroller/static.py36
-rw-r--r--lorrycontroller/status.py169
-rw-r--r--lorrycontroller/stopjob.py41
-rw-r--r--setup.py21
-rw-r--r--static/style.css18
-rw-r--r--templates/job.tpl20
-rw-r--r--templates/list-jobs.tpl32
-rw-r--r--templates/lorry.tpl44
-rw-r--r--templates/status.tpl115
-rwxr-xr-xtest-wait-for-port40
-rw-r--r--units/lighttpd-lorry-controller-webapp.service12
-rw-r--r--units/lorry-controller-ls-troves.service8
-rw-r--r--units/lorry-controller-ls-troves.timer6
-rw-r--r--units/lorry-controller-minion@.service9
-rw-r--r--units/lorry-controller-readconf.service8
-rw-r--r--units/lorry-controller-readconf.timer6
-rw-r--r--units/lorry-controller-status.service9
-rw-r--r--units/lorry-controller-status.timer6
-rw-r--r--yarns.webapp/010-introduction.yarn77
-rw-r--r--yarns.webapp/020-status.yarn27
-rw-r--r--yarns.webapp/030-queue-management.yarn106
-rw-r--r--yarns.webapp/040-running-jobs.yarn260
-rw-r--r--yarns.webapp/050-troves.yarn76
-rw-r--r--yarns.webapp/060-validation.yarn190
-rw-r--r--yarns.webapp/900-implementations.yarn484
-rw-r--r--yarns.webapp/yarn.sh56
52 files changed, 5137 insertions, 4 deletions
diff --git a/ARCH b/ARCH
new file mode 100644
index 0000000..c1cb979
--- /dev/null
+++ b/ARCH
@@ -0,0 +1,413 @@
+% Architecture of daemonised Lorry Controller
+% Codethink Ltd
+
+Introduction
+============
+
+This is an architecture document for Lorry Controller. It is aimed at
+those who develop the software.
+
+Lorry is a tool in Baserock for mirroring code from whatever format
+upstream provides it into git repositories, converting them to git as
+needed. Lorry Controller is service, running on a Trove, which runs
+Lorry against all configured upstreams, including other Troves.
+
+Lorry Controller reads a configuration from a git repository. That
+configuration includes specifications of which upstreams to
+mirror/convert. This includes what upstream Troves to mirror. Lorry
+Controller instructs Lorry to push to a Trove's git repositories.
+
+Lorry specifications, and upstream Trove specifications, may include
+scheduling information, which the Lorry Controller uses to decide when
+to execute which specification.
+
+Requirements
+============
+
+Some concepts/terminology:
+
+* CONFGIT is the git repository the Lorry Controller instance uses for
+ its configuration.
+* Lorry specification: which upstream version control repository or
+ tarball to mirror.
+* Trove specification: which upstream Trove to mirror. This gets
+ broken into generated Lorry specifications, one per git repository
+ on the upstream Trove. There can be many Trove specifications to
+ mirror many Troves.
+* job: An instance of executing a Lorry specification. Each job has an
+ identifier and associated data (such as the output provided by the
+ running job, and whether it succeeded).
+* run queue: all the Lorry specifications (from CONFGIT or generated
+ from the Troe specifications) a Lorry Controller knows about; this
+ is the set of things that get scheduled. The queue has a linear
+ order (first job in the queue is the next job to execute).
+* admin: a person who can control or reconfigure a Lorry Controller
+ instance.
+
+Original set of requirement, which have been broken down and detailed
+up below:
+
+* Lorry Controller should be capable of being reconfigured at runtime
+ to allow new tasks to be added and old tasks to be removed.
+ (RC/ADD, RC/RM, RC/START)
+* Lorry Controller should not allow all tasks to become stuck if one
+ task is taking a long time. (RR/MULTI)
+* Lorry Controller should not allow stuck tasks to remain stuck
+ forever. (Configurable timeout? monitoring of disk usage or CPU to
+ see if work is being done?) (RR/TIMEOUT)
+* Lorry Controller should be able to be controlled at runtime to allow:
+ - Querying of the current task set (RQ/SPECS, RQ/SPEC)
+ - Querying of currently running tasks (RQ/RUNNING)
+ - Promotion or demotion of a task in the queue (RT/TOP, RT/BOT)
+ - Supporting of the health monitoring to allow appropriate alerts
+ to be sent out (MON/STATIC, MON/DU)
+
+The detailed requirements (prefixed by a unique identfier, which is
+used elsewhere to refer to the exact requirement):
+
+* (FW) Lorry Controller can access upstream Troves from behind firewalls.
+ * (FW/H) Lorry Controller can access the upstream Trove using HTTP or
+ HTTPS only, without using ssh, in order to get a list of
+ repositories to mirror. (Lorry itself also needs to be able to
+ access the upstream Trove using HTTP or HTTPS only, bypassing
+ ssh, but that's a Lorry problem and outside the scope of Lorry
+ Controller, so it'll need to be dealt separately.)
+ * (FW/C) Lorry Controller does not verify SSL/TLS certificates
+ when accessing the upstream Trove.
+* (RC) Lorry Controller can be reconfigured at runtime.
+ * (RC/ADD) A new Lorry specification can be added to CONFGIT, and
+ a running Lorry Controller will add them to its run queue as
+ soon as it is notified of the change.
+ * (RC/RM) A Lorry specification can be removed from CONFGIT, and a
+ running Lorry Controller will remove it from its run queue as
+ soon as it is notified of the change.
+ * (RC/START) A Lorry Controller reads CONFGIT when it starts,
+ updating its run queue if anything has changed.
+* (RT) Lorry Controller can controlled at runtime.
+ * (RT/KILL) An admin can get their Lorry Controller to stop a running job.
+ * (RT/TOP) An admin can get their Lorry Controller to move a Lorry spec to
+ the beginning of the run queue.
+ * (RT/BOT) An admin can get their Lorry Controller to move a Lorry
+ spec to the end of the run queue.
+ * (RT/QSTOP) An admin can stop their Lorry Controller from scheduling any new
+ jobs.
+ * (RT/QSTART) An admin can get their Lorry Controller to start
+ scheduling jobs again.
+* (RQ) Lorry Controller can be queried at runtime.
+ * (RQ/RUNNING) An admin can list all currently running jobs.
+ * (RQ/ALLJOBS) An admin can list all finished jobs that the Lorry
+ Controller still remembers.
+ * (RQ/SPECS) An admin can list all existing Lorry specifications
+ in the run queue.
+ * (RQ/SPEC) An admin can query existing Lorry specifications in
+ the run queue for any information the Lorry Controller holds for
+ them, such as the last time they successfully finished running.
+* (RR) Lorry Controller is reasonably robust.
+ * (RR/CONF) Lorry Controller ignores any broken Lorry or Trove
+ specifications in CONFGIT, and runs without them.
+ * (RR/TIMEOUT) Lorry Controller stops a job that runs for too
+ long.
+ * (RR/MULTI) Lorry Controller can run multiple jobs at the same
+ time, and lets the maximal number of such jobs be configured by
+ the admin.
+ * (RR/DU) Lorry Controller (and the way it runs Lorry) is
+ designed to be frugal about disk space usage.
+ * (RR/CERT) Lorry Controller tells Lorry to not worry about
+ unverifiable SSL/TLS certificates and to continue even if the
+ certificate can't be verified or the verification fails.
+* (RS) Lorry Controller is reasonably scalable.
+ * (RS/SPECS) Lorry Controller works for the number of Lorry
+ specifications we have on git.baserock.org (a number that will
+ increase, and is currently about 500).
+ * (RS/GITS) Lorry Controller works for mirroring git.baserock.org
+ (about 500 git repositories).
+ * (RS/HW) Lorry Controller may assume that CPU, disk, and
+ bandwidth are sufficient, if not to be needlessly wasted.
+* (MON) Lorry Controller can be monitored from the outside.
+ * (MON/STATIC) Lorry Controller updates at least once a minute a
+ static HTML file, which shows its current status with sufficient
+ detail that an admin knows if things get stuck or break.
+ * (MON/DU) Lorry Controller measures, at least, the disk usage of
+ each job and Lorry specification.
+* (SEC) Lorry Controller is reasonably secure.
+ * (SEC/API) Access to the Lorry Controller run-time query and
+ controller interfaces is managed with iptables (for now).
+ * (SEC/CONF) Access to CONFGIT is managed by the git server that
+ hosts it. (Gitano on Trove.)
+
+Architecture design
+===================
+
+Constraints
+-----------
+
+Python is not good at multiple threads (partly due to the global
+interpreter lock), and mixing threads and executing subprocesses is
+quite tricky to get right in general. Thus, this design avoids using
+threads.
+
+Entities
+--------
+
+* An admin is a human being that communicates with the Lorry
+ Controller using an HTTP API. They might do it using a command line
+ client.
+* Lorry Controller runs Lorry appropriately, and consists of several
+ components described below.
+* The local Trove is where Lorry Controller tells its Lorry to push
+ the results.
+* Upstream Trove is a Trove that Lorry Controller mirrors to the local
+ Trove. There can be multiple upstream Troves.
+
+Components of Lorry Controller
+------------------------------
+
+* CONFGIT is a git repository for Lorry Controller configuration,
+ which the Lorry Controller can access and pull from. Pushing is not
+ required and should be prevented by Gitano. CONFGIT is hosted on the
+ local Trove.
+* STATEDB is persistent storage for the Lorry Controller's state: what
+ Lorry specs it knows about (provided by the admin, or generated from
+ a Trove spec by Lorry Controller itself), their ordering, jobs that
+ have been run or are being run, information about the jobs, etc.
+ The idea is that the Lorry Controller process can terminate (cleanly
+ or by crashing), and be restarted, and continue approximately where
+ it was. Also, a persistent storage is useful if there are multiple
+ processes involved due to how bottle.py and WSGI work. STATEDB is
+ implemented using sqlite3.
+* WEBAPP is the controlling part of Lorry Controller, which maintains
+ the run queue, and provides an HTTP API for monitoring and
+ controller Lorry Controller. WEBAPP is implemented as a bottle.py
+ application.
+* MINION runs jobs (external processes) on behalf of WEBAPP. It
+ communicates with WEBAPP over HTTP, and requests a job to run,
+ starts it, and while it waits, sends partial output to the WEBAPP,
+ and asks the WEBAPP whether the job should be aborted or not. MINION
+ may eventually run on a different host than WEBAPP, for added
+ scalability.
+
+Components external to Lorry Controller
+---------------------------------------
+
+* A web server. This runs the Lorry Controller WEBAPP, using WSGI so
+ that multiple instances (processes) can run at once, and thus serve
+ many clients.
+* bottle.py is a Python microframework for web applications. We
+ already have it in Baserock, where we use it for morph-cache-server,
+ and it seems to be acceptable.
+* systemd is the operating system component that starts services and
+ processes.
+
+How the components work together
+--------------------------------
+
+* Each WEBAPP instance is started by the web server, when a request
+ comes in. The web server is started by a systemd unit.
+* Each MINION instance is started by a systemd unit. Each MINION
+ handles one job at a time, and doesn't block other MINIONs from
+ running other jobs. The admins decide how many MINIONs run at once,
+ depending on hardware resources and other considerations. (RR/MULTI)
+* An admin communicates with the WEBAPP only, by making HTTP requests.
+ Each request is either a query (GET) or a command (POST). Queries
+ report state as stored in STATEDB. Commands cause the WEBAPP
+ instance to do something and alter STATEDB accordingly.
+* When an admin makes changes to CONFGIT, and pushes them to the local
+ Trove, the Trove's git post-update hook makes an HTTP request to
+ WEBAPP to update STATEDB from CONFGIT. (RC/ADD, RC/RM)
+* Each MINION likewise communicates only with the WEBAPP using HTTP
+ requests. MINION requests a job to run (which triggers WEBAPP's job
+ scheduling), and then reports results to the WEBAPP (which causes
+ WEBAPP to store them in STATEDB), which tells MINION whether to
+ continue running the job or not (RT/KILL). There is no separate
+ scheduling process: all scheduling happens when there is a MINION
+ available.
+* At system start up, a systemd unit makes an HTTP request to WEBAPP
+ to make it refresh STATEDB from CONFGIT. (RC/START)
+* A timer unit for systemd makes an HTTP request to get WEBAPP to
+ refresh the static HTML status page. (MON/STATIC)
+
+In summary: systemd starts WEBAPP and MINIONs, and whenever a
+MINION can do work, it asks WEBAPP for something to do, and reports
+back results. Meanwhile, admin can query and control via HTTP requests
+to WEBAPP, and WEBAPP instances communicate via STATEDB.
+
+The WEBAPP
+----------
+
+The WEBAPP provides an HTTP API as described below.
+
+Requests for admins:
+
+* `GET /1.0/status` causes WEBAPP to return a JSON object that
+ describes the state of Lorry Controller. This information is meant
+ to be programmatically useable and may or may not be the same as in
+ the HTML page.
+* `POST /1.0/stop-queue` causes WEBAPP to stop scheduling new jobs to
+ run. Any currently running jobs are not affected. (RT/QSTOP)
+* `POST /1.0/start-queue` causes WEBAPP to start scheduling jobs
+ again. (RT/QSTART)
+
+* `GET /1.0/list-queue` causes WEBAPP to return a JSON list of ids of
+ all Lorry specifications in the run queue, in the order they are in
+ the run queue. (RQ/SPECS)
+* `GET /1.0/lorry/<lorryspecid>` causes WEBAPP to return a JSON map
+ (dict) with all the information about the specified Lorry
+ specification. (RQ/SPEC)
+* `POST /1.0/move-to-top/<lorryspecid>` where `lorryspecid` is the id
+ of a Lorry specification in the run queue, causes WEBAPP to move the
+ specified spec to the head of the run queue, and store this in
+ STATEDB. It doesn't affect currently running jobs. (RT/TOP)
+* `POST /1.0/move-to-bottom/<lorryspecid>` is like `/move-to-top`, but
+ moves the job to the end of the run queue. (RT/BOT)
+
+* `GET /1.0/list-running-jobs` causes WEBAPP to return a JSON list of
+ ids of all currently running jobs. (RQ/RUNNING)
+* `GET /1.0/job/<jobid>` causes WEBAPP to return a JSON map (dict)
+ with all the information about the specified job.
+* `POST /1.0/stop-job/<jobid>` where `jobid` is an id of a running job,
+ causes WEBAPP to record in STATEDB that the job is to be killed, and
+ waits for it to be killed. (Killing to be done when MINION gets
+ around to it.) This request returns as soon as the STATEDB change is
+ done.
+* `GET /1.0/list-all-jobs` causes WEBAPP to return a JSON list of ids
+ of all jobs, running or finished, that it knows about. (RQ/ALLJOBS)
+
+Requests for MINION:
+
+* `GET /1.0/give-me-job` is used by MINION to get a new job to run.
+ WEBAPP will either return a JSON object describing the job to run,
+ or return a status code indicating that there is nothing to do.
+ WEBAPP will respond immediately, even if there is nothing for MINION
+ to do, and MINION will then sleep for a while before it tries again.
+ WEBAPP updates STATEDB to record that the job is allocated to a
+ MINION.
+* `POST /1.0/job-update` is used by MINION to push updates about the
+ job it is running to WEBAPP. The body is a JSON object containing
+ additional information about the job, such as data from its
+ stdout/stderr, and current resource usage. There MUST be at least
+ one `job-update` call, which indicates the job has terminated.
+ WEBAPP responds with a status indicating whether the job should
+ continue to run or be terminated (RR/TIMEOUT). WEBAPP records the
+ job as terminated only after MINION tells it the job has been
+ terminated. MINION makes the `job-update` request frequently, even
+ if the job has produced no output, so that WEBAPP can update a
+ timestamp in STATEDB to indicate the job is still alive.
+
+Other requests:
+
+* `POST /1.0/read-configuration` causes WEBAPP to update its copy of
+ CONFGIT and update STATEDB based on the new configuration, if it has
+ changed. Returns OK/ERROR status. (RC/ADD, RC/RM, RC/START)
+* `GET /1.0/status-html` causes WEBAPP to return an HTML page that
+ describes the state of Lorry Controller. This also updates an
+ on-disk copy of the HTML page, which the web server is configured to
+ serve using a normal HTTP request. (MON/STATIC)
+
+The MINION
+----------
+
+* Do `GET /1.0/give-me-job` to WEBAPP.
+* If didn't get a job, sleep a while and try again.
+* If did get job, fork and exec that.
+* In a loop: wait for output, for a suitably short period of time,
+ from job (or its termination), with `select` or similar mechanism,
+ and send anything (if anything) you get to WEBAPP. If the WEBAPP
+ told us to kill the job, kill it, then send an update to that effect
+ to WEBAPP.
+* Go back to top to request new job.
+
+STATEDB
+-------
+
+The STATEDB has several tables. This section explains them.
+
+The `running_queue` table has a single column (`running`) and a single
+row, and is used to store a single boolean value that specifies
+whether WEBAPP is giving out jobs to run from the run-queue. This
+value is controlled by `/1.0/start-queue` and `/1.0/stop-queue`
+requests.
+
+The `lorries` table implements the run-queue: all the Lorry specs that
+WEBAPP knows about. It has the following columns:
+
+* `path` is the path of the git repository on the local Trove, i.e.,
+ the git repository to which Lorry will push. This is a unique
+ identifier. It is used, for example, to determine if a Lorry spec
+ is obsolete after a CONFGIT update.
+* `text` has the text of the Lorry spec. This may be read from a file
+ or generated by Lorry Controller itself. This text will be given to
+ Lorry when a job is run.
+* `generated` is set to 0 or 1, depending on if the Lorry came from an
+ actual `.lorry` file or was generated by Lorry Controller.
+
+Implementation plan
+===================
+
+The following are meant to be a good sequence of steps to implement
+the design as described above.
+
+* Make a skeleton Lorry Controller and yarn test suite for it (2d)
+
+ Write a simplistic, skeleton of a Lorry Controller WEBAPP and MINION,
+ and a few representative tests for them using yarn. The goal here is
+ not to have applications that do something real, or tests that test
+ something real, but to have a base upon which to start building, and
+ especially to make it easy to write tests (including new step
+ implementations) easily in the future.
+
+* Implement /1.0/status and /1.0/status-html in Lorry Controller
+ WEBAPP (1d)
+
+ This is the very basic, core of the status reporting. Every
+ subsequent change will include updating the status reporting as
+ necessary.
+
+* Implement /1.0/status/disk-free-bytes in Lorry Controller WEBAPP (1d)
+
+* Implement /1.0/stop-queue and /1.0/start-queue in Lorry Controller
+ WEBAPP (1d)
+
+ This should just affect the bit in STATEDB that decides whether we
+ are currently running jobs from the run queue or not. This
+ implementation step does not need to actually implement running
+ jobs.
+
+* Implement /1.0/read-configuration and /1.0/list-queue in Lorry
+ Controller WEBAPP (3d) (S10450)
+
+ This requires implementing parsing of the configuration files in
+ CONFGIT, generation of Lorry specs from Trove specs,
+ adding/removing/updating specs in the run queue according to
+ changes. list-queue needs to be implemented so that the results of
+ read-configuration can be verified.
+
+* Implement running jobs in Lorry Controller WEBAPP (1d) (S10451)
+
+ Requests /1.0/give-me-job, /1.0/job-update,
+ /1.0/list-running-jobs, /1.0/stop-job/. These do not actually run
+ anything, of course, since that is a job for MINION, but they
+ change the state of the job in STATEDB, and that's what needs to
+ be implemented and tested.
+
+* Implement MINION in Lorry Controller (1d) (S10452)
+
+* Implement /1.0/move-to-top/ and /1.0/move-to-bottom/ in Lorry
+ Controller WEBAPP (1d) (S10453)
+
+* Implement /1.0/list-all-jobs, /1.0/job/ in Lorry Controller
+ WEBAPP (1d) (S10454)
+
+* Implement /1.0/lorry/ in Lorry Controller WEBAPP (1d) (S10455)
+
+* Add new Lorry Controller to Trove (2d) (S10456)
+
+ Replace old Lorry Controller with new one, and add any systemd
+ units needed to make it functional. Create at least a very basic
+ sanity check, using yarn, to verify that a deployed, running
+ system has a working Lorry Controller.
+
+* Review Lorry Controller situation and decide on further work
+
+ No implementation plan survives contact with reality, and thus
+ things will need to be reviewed at the end, in case something has
+ been forgotten or requirements have changed.
diff --git a/README b/README
index b6b2662..ea3059f 100644
--- a/README
+++ b/README
@@ -1,8 +1,96 @@
README for lorry-controller
===========================
-Notes
------
+Lorry Controller mirrors Troves and mirrors or converts upstream
+projects into git repositories on the local Trove. Lorry Controller
+reads a configuration file (see below) and runs the Lorry program
+against the intended targets at suitable intervals.
-The lorry-controller.conf here is an example starting config for any customer
-configuration. It is a single trove instance pointing at git.baserock.org.
+Lorry Controller configuration
+------------------------------
+
+Lorry Controller reads a configuration file of the following format.
+
+* The file uses JSON syntax.
+* The file is a list.
+* Each item in the list is a mapping specifying a Trove or a set of
+ `.lorry` files.
+* A Trove spec specifies another Trove to mirror completely.
+* A Lorry spec specifies a set of Lorry specification (`.lorry` files)
+ for individual project repositories to convert or mirror to git on
+ the local Trove.
+
+Each spec (mapping) has a number of key/value pairs. The following are
+shared between Trove and Lorry specs:
+
+* `type` is the type of the spec; value MUST be either `trove` or
+ `lorries`.
+* `interval` specifies how often Lorry Controller should mirror the
+ repositories in the spec. See below for INTERVAL.
+* `protocol`: specifies how Lorry Controller (and Lorry) should talk
+ to remove Troves. Allowed values are `ssh`, `https`, `http`. This
+ field is mandatory.
+* `auth`: Specifies how to authenticate to the remote Trove over
+ https. The is an optional field. If present, it should be a
+ dictionary with the fields `username` and `password`.
+* Additionally, the following seem to be supported by an old version
+ of Lorry Controller, but are ignored by the new Lorry Controller:
+ `uuid`, `serial`, `create`, `destroy`, `stagger`, `tarball`.
+
+Trove specs have the following keys:
+
+* `trovehost` is the other Trove to mirror; a domain name or IP
+ address. It is mandatory.
+* `ls-interval` determines how often should Lorry Controller query the
+ other Trove for a list of repositories it may mirror. See below for
+ INTERVAL. `ls-interval` is mandatory.
+* `prefixmap` maps repository path prefixes from the other Trove to
+ the local Trove. It is mandatory in a Trove spec. If the remote
+ prefix is `foo`, and the local prefix is `bar`, then remote
+ repository `foo/baserock/yeehaa` gets mirrored to local repository
+ `bar/baserock/yeehaa`. If the remote Trove has a repository that
+ does not match a prefix, that repository gets ignored.
+* `ignore` is a list of git repositories from the other Trove that
+ should NOT be mirrored. Each list element is a path to the git
+ repository (not including leading slash). `ignore` is optional.
+
+An INTERVAL value (for `interval` or `ls-interval`) is number and a
+unit to indicate a time interval. Allowed units are minutes (`m`),
+hours (`h`), and days (`d`), expressed as single-letter codes in upper
+or lower case.
+
+Lorry specs have the following keys:
+
+* `prefix` is a path prefix to be prepended to all repositories
+ created from the `.lorry` files from this spec. It is mandatory.
+* `globs` is a list of globs (as strings) for `.lorry` files to use.
+ The glob is matched in the directory containing the configuration
+ file in which this spec is. It is OK for the globs to not match
+ anything. A `globs` entry is mandatory, however.
+
+A fairly minimal example for mirroring `git.baserock.org` and using
+local `.lorry` files.
+
+ [
+ {
+ "type": "trove",
+ "trovehost": "git.baserock.org",
+ "ls-interval": "4d",
+ "interval": "2h",
+ "prefixmap": {
+ "baserock": "baserock",
+ "delta": "delta"
+ }
+ },
+ {
+ "type": "lorries",
+ "interval": "1h",
+ "prefix": "delta",
+ "globs": [
+ "open-source-lorries/*.lorry"
+ ]
+ }
+ ]
+
+The syntax of `.lorry` files is specified by the Lorry program; see
+its documentation for details.
diff --git a/check b/check
new file mode 100755
index 0000000..b449f2b
--- /dev/null
+++ b/check
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -eu
+
+yarn -s yarns.webapp/yarn.sh yarns.webapp/*.yarn --env PYTHONPATH="$PYTHONPATH" "$@"
diff --git a/etc/lighttpd/lorry-controller-webapp-httpd.conf b/etc/lighttpd/lorry-controller-webapp-httpd.conf
new file mode 100644
index 0000000..ec92783
--- /dev/null
+++ b/etc/lighttpd/lorry-controller-webapp-httpd.conf
@@ -0,0 +1,33 @@
+server.document-root = "/var/www/htdocs"
+
+server.port = 12765
+
+server.username = "lorry"
+server.groupname = "lorry"
+
+server.modules += ("mod_cgi", "mod_fastcgi")
+
+mimetype.assign = (
+ ".html" => "text/html",
+ ".txt" => "text/plain",
+ ".jpg" => "image/jpeg",
+ ".png" => "image/png",
+ ".css" => "text/css"
+)
+
+$SERVER["socket"] == ":12765" {
+ server.username = "lorry"
+ server.groupname = "lorry"
+ fastcgi.server = (
+ "" =>
+ (
+ "python-fcgi" =>
+ (
+ "socket" => "/run/lighttpd-lorry/lorry-controller-webapp.socket",
+ "bin-path" => "/usr/bin/lorry-controller-webapp --config=/etc/lorry-controller/webapp.conf",
+ "check-local" => "disable",
+ "max-procs" => 16,
+ )
+ )
+ )
+}
diff --git a/lorry-controller-minion b/lorry-controller-minion
new file mode 100755
index 0000000..fe2089f
--- /dev/null
+++ b/lorry-controller-minion
@@ -0,0 +1,302 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import fcntl
+import httplib
+import json
+import logging
+import os
+import platform
+import random
+import select
+import subprocess
+import tempfile
+import time
+import urllib
+
+import cliapp
+
+import lorrycontroller
+
+
+class WEBAPPError(Exception):
+
+ def __init__(self, status, reason, body):
+ Exception.__init__(
+ self, 'WEBAPP returned %s %s:\n%sbody' % (status, reason, body))
+
+
+class MINION(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.string(
+ ['webapp-host'],
+ 'address of WEBAPP',
+ default='localhost')
+
+ self.settings.integer(
+ ['webapp-port'],
+ 'port of WEBAPP',
+ default=80)
+
+ self.settings.integer(
+ ['webapp-timeout'],
+ 'how long to wait for an HTTP response from WEBAPP (in seconds)',
+ default=10)
+
+ self.settings.integer(
+ ['sleep'],
+ 'do nothing for this long if there is no new job available '
+ '(0 for random 30..60 s)',
+ default=0)
+
+ self.settings.string(
+ ['lorry-cmd'],
+ 'run CMD as argv0 instead of lorry '
+ '(args will be added as for lorry)',
+ metavar='CMD',
+ default='lorry')
+
+ self.settings.string(
+ ['lorry-working-area'],
+ 'where will Lorry put its files?',
+ metavar='DIR',
+ default='/home/lorry/working-area')
+
+ self.settings.string(
+ ['proxy-config'],
+ 'read HTTP proxy config from FILENAME',
+ metavar='FILENAME')
+
+ def process_args(self, args):
+ logging.info('Starting MINION')
+
+ if self.settings['sleep'] == 0:
+ self.settings['sleep'] = random.randint(30, 60)
+
+ if self.settings['proxy-config']:
+ lorrycontroller.setup_proxy(self.settings['proxy-config'])
+
+ while True:
+ job_spec = self.get_job_spec()
+ if job_spec:
+ self.run_job(job_spec)
+ else:
+ logging.info(
+ 'Got no job from WEBAPP, sleeping for %s s',
+ self.settings['sleep'])
+ time.sleep(self.settings['sleep'])
+
+ def get_job_spec(self):
+ host = self.settings['webapp-host']
+ port = int(self.settings['webapp-port'])
+ timeout = self.settings['webapp-timeout']
+
+ logging.debug('Requesting job from WEBAPP (%s:%s)', host, port)
+
+ params = urllib.urlencode({
+ 'host': platform.node(),
+ 'pid': os.getpid(),
+ })
+
+ try:
+ body = self.webapp_request('POST', '/1.0/give-me-job', params)
+ except WEBAPPError as e:
+ logging.error(str(e))
+ return None
+
+ obj = json.loads(body)
+ if obj.get('job_id', None):
+ return obj
+ return None
+
+ def run_job(self, job_spec):
+ self.start_job(job_spec)
+ while True:
+ stdout, stderr, exit = self.poll_job()
+ kill_job = self.update_webapp_about_job(
+ job_spec, stdout, stderr, exit)
+ if exit is not None:
+ break
+ if kill_job:
+ exit = self.kill_job()
+ self.update_webapp_about_job(
+ job_spec, '', '', exit)
+ break
+
+ def start_job(self, job_spec):
+ logging.info(
+ 'Running job %s: %s on %s',
+ job_spec['job_id'],
+ self.settings['lorry-cmd'],
+ job_spec['path'])
+
+ fd, self.temp_lorry_filename = tempfile.mkstemp()
+ os.write(fd, job_spec['text'])
+ os.close(fd)
+
+ argv = [
+ self.settings['lorry-cmd'],
+ self.temp_lorry_filename,
+ ]
+
+ pipe = os.pipe()
+ self.stdout_fd = pipe[0]
+ self.set_nonblocking(self.stdout_fd)
+
+ devnull = open('/dev/null')
+
+ self.process = subprocess.Popen(
+ argv,
+ stdin=devnull,
+ stdout=pipe[1],
+ stderr=subprocess.STDOUT)
+
+ os.close(pipe[1])
+ devnull.close()
+
+ def set_nonblocking(self, fd):
+ flags = fcntl.fcntl(fd, fcntl.F_GETFL, 0)
+ flags = flags | os.O_NONBLOCK
+ fcntl.fcntl(fd, fcntl.F_SETFL, flags)
+
+ def poll_job(self):
+ read_size = 1024
+
+ exit = self.process.poll()
+ if exit is None:
+ # Process is still running.
+ wait_for_output = 10.0
+ r, w, x = select.select([self.stdout_fd], [], [], wait_for_output)
+ stdout = stderr = ''
+ if r:
+ stdout = os.read(self.stdout_fd, read_size)
+ else:
+ # Finished.
+ if exit != 0:
+ logging.error('Subprocess failed')
+ stdout_parts = []
+ while True:
+ data = os.read(self.stdout_fd, read_size)
+ if not data:
+ break
+ stdout_parts.append(data)
+ stdout = ''.join(stdout_parts)
+ stderr = ''
+ os.remove(self.temp_lorry_filename)
+
+ os.close(self.stdout_fd)
+ self.stdout_fd = None
+
+ return stdout, stderr, exit
+
+ def kill_job(self):
+ self.process.kill()
+ return self.process.wait()
+
+ def update_webapp_about_job(self, job_spec, stdout, stderr, exit):
+ logging.debug(
+ 'Updating WEBAPP about running job %s', job_spec['job_id'])
+
+ if exit is None:
+ disk_usage = None
+ else:
+ disk_usage = self.get_lorry_disk_usage(job_spec)
+
+ params = urllib.urlencode({
+ 'job_id': job_spec['job_id'],
+ 'exit': 'no' if exit is None else exit,
+ 'stdout': stdout,
+ 'stderr': stderr,
+ 'disk_usage': disk_usage,
+ })
+
+ try:
+ body = self.webapp_request('POST', '/1.0/job-update', params)
+ except WEBAPPError as e:
+ logging.error(str(e))
+ return
+
+ obj = json.loads(body)
+ return obj['kill_job']
+
+ def webapp_request(self, method, path, body):
+ logging.debug(
+ 'Making HTTP request to WEBAPP: method=%r path=%r body=%r',
+ method, path, body)
+
+ host = self.settings['webapp-host']
+ port = int(self.settings['webapp-port'])
+ timeout = self.settings['webapp-timeout']
+ conn = httplib.HTTPConnection(host, port=port, timeout=timeout)
+
+ headers = {}
+ if body:
+ headers['Content-type'] = 'application/x-www-form-urlencoded'
+
+ conn.request(method, path, body=body, headers=headers)
+
+ response = conn.getresponse()
+ response_body = response.read()
+ conn.close()
+
+ if response.status != httplib.OK:
+ raise WEBAPPError(response.status, response.reason, response_body)
+
+ return response_body
+
+ def get_lorry_disk_usage(self, job_spec):
+ dirname = os.path.join(
+ self.settings['lorry-working-area'],
+ self.escape_lorry_area_basename(job_spec['path']))
+ return self.disk_usage_by_dir(dirname)
+
+ def escape_lorry_area_basename(self, basename):
+ # FIXME: This code should be kept in sync with the respective
+ # code in lorry, or, better, we would import the code from
+ # Lorry directly.
+
+ assert '\0' not in basename
+ # We escape slashes as underscores.
+ return '_'.join(basename.split('/'))
+
+ def disk_usage_by_dir(self, dirname):
+ exit, out, err = cliapp.runcmd_unchecked(['du', '-sk', dirname])
+ if exit:
+ logging.error('du -sk %s failed: %r', dirname, err)
+ return 0
+
+ lines = out.splitlines()
+ if not lines:
+ logging.warning('no output from du')
+ return 0
+
+ words = lines[-1].split()
+ if not words:
+ logging.warning('last line of du output is empty')
+ return 0
+
+ kibibyte = 1024
+ try:
+ return int(words[0]) * kibibyte
+ except ValueError:
+ logging.warning('error converting %r to string' % words[0])
+ return 0
+
+
+MINION().run()
diff --git a/lorry-controller-webapp b/lorry-controller-webapp
new file mode 100755
index 0000000..239e988
--- /dev/null
+++ b/lorry-controller-webapp
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import wsgiref.simple_server
+
+import bottle
+import cliapp
+from flup.server.fcgi import WSGIServer
+
+
+import lorrycontroller
+
+
+class WEBAPP(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.string(
+ ['statedb'],
+ 'use FILE as the state database',
+ metavar='FILE')
+
+ self.settings.string(
+ ['configuration-directory'],
+ 'use DIR as the configuration directory',
+ metavar='DIR',
+ default='.')
+
+ self.settings.string(
+ ['confgit-url'],
+ 'get CONFGIT from URL',
+ metavar='URL')
+
+ self.settings.string(
+ ['confgit-branch'],
+ 'get git branch BRANCH in CONFGIT',
+ metavar='URL',
+ default='master')
+
+ self.settings.boolean(
+ ['debug-real-confgit'],
+ 'if true, do real git operations on the configuration directory; '
+ 'if false, do no git operations on it and just what is there',
+ default=True)
+
+ self.settings.string(
+ ['status-html'],
+ 'write a static HTML page to FILE to describe overall status',
+ metavar='FILE',
+ default='/dev/null')
+
+ self.settings.boolean(
+ ['wsgi'],
+ 'run in wsgi mode (default is debug mode, for development)')
+
+ self.settings.integer(
+ ['debug-port'],
+ 'use PORT in debugging mode '
+ '(i.e., when not running under WSGI); '
+ 'note that using this to non-zero disables --debug-port-file',
+ metavar='PORT',
+ default=0)
+
+ self.settings.string(
+ ['debug-port-file'],
+ 'write listening port to FILE when in debug mode '
+ '(i.e., not running under WSGI)',
+ metavar='FILE',
+ default='webapp.port')
+
+ self.settings.string(
+ ['debug-host'],
+ 'listen on HOST when in debug mode (i.e., not running under WSGI)',
+ metavar='HOST',
+ default='0.0.0.0')
+
+ self.settings.string_list(
+ ['debug-fake-trove'],
+ 'fake access to remote Troves (to do gitano ls, etc) '
+ 'using local files: get ls listing for TROVE from $PATH, '
+ 'where PATH names a file in JSON with the necessary info; '
+ 'may be used multiple times',
+ metavar='TROVE=PATH')
+
+ self.settings.string(
+ ['templates'],
+ 'find HTML page templates (*.tpl) in DIR',
+ metavar='DIR',
+ default='/usr/share/lorry-controller/templates')
+
+ self.settings.string(
+ ['static-files'],
+ 'server static files from DIR',
+ metavar='DIR',
+ default='/usr/share/lorry-controller/static')
+
+ def find_routes(self):
+ '''Return all classes that are API routes.
+
+ This is a generator.
+
+ '''
+
+ # This is a bit tricky and magic. globals() returns a dict
+ # that contains all objects in the global namespace. We
+ # iterate over the objects and pick the ones that are
+ # subclasses of our superclass (no duck typing here), but ARE
+ # NOT the superclass itself.
+
+ for name in dir(lorrycontroller):
+ x = getattr(lorrycontroller, name)
+ is_route = (
+ type(x) == type and # it must be class, for issubclass
+ issubclass(x, lorrycontroller.LorryControllerRoute) and
+ x != lorrycontroller.LorryControllerRoute)
+ if is_route:
+ yield x
+
+ def process_args(self, args):
+ self.settings.require('statedb')
+
+ self.setup_proxy()
+
+ templates = self.load_templates()
+
+ webapp = bottle.Bottle()
+
+ for route_class in self.find_routes():
+ route = route_class(self.settings, templates)
+ webapp.route(
+ path=route.path,
+ method=route.http_method,
+ callback=route.run)
+
+ logging.info('Starting server')
+ if self.settings['wsgi']:
+ self.run_wsgi_server(webapp)
+ else:
+ self.run_debug_server(webapp)
+
+ def load_templates(self):
+ templates = {}
+ for basename in os.listdir(self.settings['templates']):
+ if basename.endswith('.tpl'):
+ name = basename[:-len('.tpl')]
+ pathname = os.path.join(self.settings['templates'], basename)
+ with open(pathname) as f:
+ templates[name] = f.read()
+ return templates
+
+ def run_wsgi_server(self, webapp):
+ WSGIServer(webapp).run()
+
+ def run_debug_server(self, webapp):
+ if self.settings['debug-port']:
+ self.run_debug_server_on_given_port(webapp)
+ else:
+ self.run_debug_server_on_random_port(webapp)
+
+ def run_debug_server_on_given_port(self, webapp):
+ bottle.run(
+ webapp,
+ host=self.settings['debug-host'],
+ port=self.settings['debug-port'],
+ quiet=True,
+ debug=True)
+
+ def run_debug_server_on_random_port(self, webapp):
+ server_port_file = self.settings['debug-port-file']
+
+ class DebugServer(wsgiref.simple_server.WSGIServer):
+ '''WSGI-like server that uses an ephemeral port.
+
+ Rather than use a specified port, or default, the
+ DebugServer connects to an ephemeral port and writes
+ its number to debug-port-file, so a non-racy temporary
+ port can be used.
+
+ '''
+
+ def __init__(self, (host, port), *args, **kwargs):
+ wsgiref.simple_server.WSGIServer.__init__(
+ self, (host, 0), *args, **kwargs)
+ with open(server_port_file, 'w') as f:
+ f.write(str(self.server_port) + '\n')
+
+ bottle.run(
+ webapp,
+ host=self.settings['debug-host'],
+ server_class=DebugServer,
+ quiet=True,
+ debug=True)
+
+ def setup_proxy(self):
+ """Tell urllib2 to use a proxy for http action by lorry-controller.
+
+ Load the proxy information from the JSON file given by proxy_def, then
+ set urllib2's url opener to open urls via an authenticated proxy.
+
+ """
+
+ config_filename = os.path.join(
+ self.settings['configuration-directory'], 'proxy.conf')
+ lorrycontroller.setup_proxy(config_filename)
+
+
+WEBAPP().run()
diff --git a/lorry-controller.morph b/lorry-controller.morph
new file mode 100644
index 0000000..9f1623a
--- /dev/null
+++ b/lorry-controller.morph
@@ -0,0 +1,14 @@
+build-system: python-distutils
+kind: chunk
+name: lorry-controller
+post-install-commands:
+- install -d 0755 "$DESTDIR/etc/lighttpd"
+- install -m 0644 -D etc/lighttpd/*.conf "$DESTDIR/etc/lighttpd/."
+- |
+ TGT="$DESTDIR/usr/lib/systemd/system"
+ install -d "$TGT/multi-user.target.wants"
+ install -m 0644 units/*.service units/*.timer "$TGT/."
+ cd "$TGT/multi-user.target.wants"
+ # Ignore template units for symlinking. The template units will be
+ # instantiated at deploy time by trove.configure.
+ ln -s ../*[^@].service ../*[^@].timer .
diff --git a/lorrycontroller/__init__.py b/lorrycontroller/__init__.py
new file mode 100644
index 0000000..9dd6496
--- /dev/null
+++ b/lorrycontroller/__init__.py
@@ -0,0 +1,44 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+from statedb import (
+ StateDB,
+ LorryNotFoundError,
+ WrongNumberLorriesRunningJob,
+ TroveNotFoundError)
+from route import LorryControllerRoute
+from readconf import ReadConfiguration
+from status import Status, StatusHTML, StatusRenderer
+from listqueue import ListQueue
+from showlorry import ShowLorry, ShowLorryHTML
+from startstopqueue import StartQueue, StopQueue
+from givemejob import GiveMeJob
+from jobupdate import JobUpdate
+from listrunningjobs import ListRunningJobs
+from movetopbottom import MoveToTop, MoveToBottom
+from stopjob import StopJob
+from listjobs import ListAllJobs, ListAllJobsHTML
+from showjob import ShowJob, ShowJobHTML, JobShower
+from removejob import RemoveJob
+from lstroves import LsTroves, ForceLsTrove
+from pretendtime import PretendTime
+from maxjobs import GetMaxJobs, SetMaxJobs
+from gitano import GitanoCommand, GitanoCommandFailure
+from static import StaticFile
+from proxy import setup_proxy
+
+
+__all__ = locals()
diff --git a/lorrycontroller/gitano.py b/lorrycontroller/gitano.py
new file mode 100644
index 0000000..b2c9123
--- /dev/null
+++ b/lorrycontroller/gitano.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import collections
+import logging
+import re
+import urllib2
+import urlparse
+
+import cliapp
+
+import lorrycontroller
+
+
+class GitanoCommandFailure(Exception):
+
+ def __init__(self, trovehost, command, stderr):
+ Exception.__init__(
+ self,
+ 'Failed to run "%s" on Gitano on %s\n%s' %
+ (command, trovehost, stderr))
+
+
+class GitanoCommand(object):
+
+ '''Run a Gitano command on a Trove.'''
+
+ def __init__(self, trovehost, protocol, username, password):
+ self.trovehost = trovehost
+ self.protocol = protocol
+ self.username = username
+ self.password = password
+
+ if protocol == 'ssh':
+ self._command = self._ssh_command
+ elif protocol in ('http', 'https'):
+ self._command = self._http_command
+ else:
+ raise GitanoCommandFailure(
+ self.trovehost, '__init__', 'unknown protocol %s' % protocol)
+
+ def whoami(self):
+ return self._command(['whoami'])
+
+ def create(self, repo_path):
+ self._command(['create', repo_path])
+
+ def get_gitano_config(self, repo_path):
+ stdout = self._command(['config', repo_path, 'show'])
+
+ # "config REPO show" outputs a sequence of lines of the form "key: value".
+ # Extract those into a collections.defaultdict.
+
+ result = collections.defaultdict(str)
+ for line in stdout.splitlines():
+ m = re.match(r'^([^:])+:\s*(.*)$', line)
+ if m:
+ result[m.group(0)] = m.group(1).strip()
+
+ return result
+
+ def set_gitano_config(self, path, key, value):
+ self._command(['config', path, 'set', key, value])
+
+ def ls(self):
+ return self._command(['ls'])
+
+ def _ssh_command(self, gitano_args):
+ quoted_args = [cliapp.shell_quote(x) for x in gitano_args]
+
+ base_argv = [
+ 'ssh',
+ '-oStrictHostKeyChecking=no',
+ '-oBatchMode=yes',
+ 'git@%s' % self.trovehost,
+ ]
+
+ exit, stdout, stderr = cliapp.runcmd_unchecked(
+ base_argv + quoted_args)
+
+ if exit != 0:
+ logging.error(
+ 'Failed to run "%s" for %s:\n%s',
+ self.trovehost, stdout + stderr)
+ raise GitanoCommandFailure(
+ self.trovehost,
+ ' '.join(gitano_args),
+ stdout + stderr)
+
+ return stdout
+
+ def _http_command(self, gitano_args):
+ quoted_args = urllib2.quote(' '.join(gitano_args))
+ url = urlparse.urlunsplit((
+ self.protocol,
+ self.trovehost,
+ '/gitano-command.cgi',
+ 'cmd=%s' % quoted_args,
+ ''))
+ logging.debug('url=%r', url)
+
+ try:
+ request = urllib2.Request(url, None, {})
+ logging.debug('request=%r', request.get_full_url())
+ if self.username and self.password:
+ password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
+ password_mgr.add_password(None, url, self.username, self.password)
+ auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
+ opener = urllib2.build_opener(auth_handler)
+ response = opener.open(url)
+ else:
+ response = urllib2.urlopen(request)
+ except urllib2.URLError as e:
+ raise GitanoCommandFailure(
+ self.trovehost, ' '.join(gitano_args), str(e))
+
+ return response.read()
diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py
new file mode 100644
index 0000000..43abcc8
--- /dev/null
+++ b/lorrycontroller/givemejob.py
@@ -0,0 +1,130 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import collections
+import logging
+import re
+import time
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class GiveMeJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/give-me-job'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ readdb = self.open_statedb()
+ if readdb.get_running_queue() and not self.max_jobs_reached(readdb):
+ statedb = self.open_statedb()
+ with statedb:
+ lorry_infos = statedb.get_all_lorries_info()
+ now = statedb.get_current_time()
+ for lorry_info in lorry_infos:
+ if self.ready_to_run(lorry_info, now):
+ self.create_repository_in_local_trove(
+ statedb, lorry_info)
+ if lorry_info['from_trovehost']:
+ self.copy_repository_metadata(statedb, lorry_info)
+ self.give_job_to_minion(statedb, lorry_info, now)
+ logging.info(
+ 'Giving job %s to lorry %s to MINION %s:%s',
+ lorry_info['job_id'],
+ lorry_info['path'],
+ bottle.request.forms.host,
+ bottle.request.forms.pid)
+ return lorry_info
+
+ logging.info('No job to give MINION')
+ return { 'job_id': None }
+
+ def max_jobs_reached(self, statedb):
+ max_jobs = statedb.get_max_jobs()
+ if max_jobs is None:
+ return False
+ running_jobs = statedb.get_running_jobs()
+ return len(running_jobs) >= max_jobs
+
+ def ready_to_run(self, lorry_info, now):
+ due = lorry_info['last_run'] + lorry_info['interval']
+ return (lorry_info['running_job'] is None and due <= now)
+
+ def create_repository_in_local_trove(self, statedb, lorry_info):
+ # Create repository on local Trove. If it fails, assume
+ # it failed because the repository already existed, and
+ # ignore the failure (but log message).
+
+ local = lorrycontroller.GitanoCommand('localhost', 'ssh', None, None)
+ try:
+ local.create(lorry_info['path'])
+ except lorrycontroller.GitanoCommandFailure as e:
+ logging.debug(
+ 'Ignoring error creating %s on local Trove: %s',
+ lorry_info['path'], e)
+ else:
+ logging.info('Created %s on local repo', lorry_info['path'])
+
+ def copy_repository_metadata(self, statedb, lorry_info):
+ '''Copy project.head and project.description to the local Trove.'''
+
+ assert lorry_info['from_trovehost']
+ assert lorry_info['from_path']
+
+ remote = self.new_gitano_command(statedb, lorry_info['from_trovehost'])
+ local = lorrycontroller.GitanoCommand('localhost', 'ssh', None, None)
+
+ try:
+ remote_config = remote.get_gitano_config(lorry_info['from_path'])
+ local_config = local.get_gitano_config(lorry_info['path'])
+
+ if remote_config['project.head'] != local_config['project.head']:
+ local.set_gitano_config(
+ lorry_info['path'],
+ 'project.head',
+ remote_config['project.head'])
+
+ if not local_config['project.description']:
+ desc = '{host}: {desc}'.format(
+ host=lorry_info['from_trovehost'],
+ desc=remote_config['project.description'])
+ local.set_gitano_config(
+ lorry_info['path'],
+ 'project.description',
+ desc)
+ except lorrycontroller.GitanoCommandFailure as e:
+ logging.error('ERROR: %s' % str(e))
+ # FIXME: The following is commented out, for now. We need
+ # a good way to report such errors. However, we probably
+ # don't want to fail the request.
+ if False:
+ bottle.abort(500)
+
+ def give_job_to_minion(self, statedb, lorry_info, now):
+ path = lorry_info['path']
+ minion_host = bottle.request.forms.host
+ minion_pid = bottle.request.forms.pid
+ running_job = statedb.get_next_job_id()
+ statedb.set_running_job(path, running_job)
+ statedb.add_new_job(
+ running_job, minion_host, minion_pid, path, int(now))
+ lorry_info['job_id'] = running_job
+ return lorry_info
diff --git a/lorrycontroller/jobupdate.py b/lorrycontroller/jobupdate.py
new file mode 100644
index 0000000..b6ee1fe
--- /dev/null
+++ b/lorrycontroller/jobupdate.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class JobUpdate(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/job-update'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ job_id = int(bottle.request.forms.job_id)
+ exit = bottle.request.forms.exit
+ stdout = bottle.request.forms.stdout
+ stderr = bottle.request.forms.stderr
+ disk_usage = bottle.request.forms.disk_usage
+
+ logging.info('Job %s updated (exit=%s)', job_id, exit)
+
+ statedb = self.open_statedb()
+ with statedb:
+ if stdout:
+ statedb.append_to_job_output(job_id, stdout)
+ if stderr:
+ statedb.append_to_job_output(job_id, stderr)
+
+ path = statedb.find_lorry_running_job(job_id)
+ lorry_info = statedb.get_lorry_info(path)
+
+ if exit is not None and exit != 'no':
+ now = statedb.get_current_time()
+ statedb.set_lorry_last_run(path, int(now))
+ statedb.set_running_job(path, None)
+ statedb.set_job_exit(job_id, exit, int(now), disk_usage)
+ statedb.set_lorry_disk_usage(path, disk_usage)
+ elif self.time_to_die(statedb, job_id, lorry_info):
+ logging.warning(
+ 'Job %r has been running too long, '
+ 'marking it to be exterminated', job_id)
+ statedb.set_kill_job(path, True)
+
+ obj = statedb.get_lorry_info(path)
+ logging.debug('obj=%r', obj)
+ return obj
+
+ def time_to_die(self, statedb, job_id, lorry_info):
+ started, ended = statedb.get_job_started_and_ended(job_id)
+ lorry_timeout = lorry_info['lorry_timeout']
+ now = statedb.get_current_time()
+ age = now - started
+ logging.debug('started=%r', started)
+ logging.debug('ended=%r', ended)
+ logging.debug('lorry_timeout=%r', lorry_timeout)
+ logging.debug('now=%r', now)
+ logging.debug('age=%r', age)
+ return age >= lorry_timeout
diff --git a/lorrycontroller/listjobs.py b/lorrycontroller/listjobs.py
new file mode 100644
index 0000000..eaffeef
--- /dev/null
+++ b/lorrycontroller/listjobs.py
@@ -0,0 +1,63 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class ListAllJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ return { 'job_ids': statedb.get_job_ids() }
+
+
+class ListAllJobsHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-jobs-html'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ now = statedb.get_current_time()
+ values = {
+ 'job_infos': self.get_jobs(statedb),
+ 'timestamp':
+ time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now)),
+ }
+ return bottle.template(self._templates['list-jobs'], **values)
+
+ def get_jobs(self, statedb):
+ jobs = []
+ for job_id in statedb.get_job_ids():
+ exit = statedb.get_job_exit(job_id)
+ job = {
+ 'job_id': job_id,
+ 'exit': 'no' if exit is None else str(exit),
+ 'path': statedb.get_job_path(job_id),
+ }
+ jobs.append(job)
+ return jobs
diff --git a/lorrycontroller/listqueue.py b/lorrycontroller/listqueue.py
new file mode 100644
index 0000000..5d68b83
--- /dev/null
+++ b/lorrycontroller/listqueue.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import lorrycontroller
+
+
+class ListQueue(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-queue'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ return {
+ 'queue':
+ [spec['path'] for spec in statedb.get_all_lorries_info()],
+ }
diff --git a/lorrycontroller/listrunningjobs.py b/lorrycontroller/listrunningjobs.py
new file mode 100644
index 0000000..1f44743
--- /dev/null
+++ b/lorrycontroller/listrunningjobs.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import lorrycontroller
+
+
+class ListRunningJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/list-running-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ job_ids = statedb.get_running_jobs()
+ return {
+ 'running_jobs': job_ids,
+ }
diff --git a/lorrycontroller/lstroves.py b/lorrycontroller/lstroves.py
new file mode 100644
index 0000000..1f10209
--- /dev/null
+++ b/lorrycontroller/lstroves.py
@@ -0,0 +1,217 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import logging
+import time
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class GitanoLsError(Exception):
+
+ def __init__(self, trovehost, output):
+ Exception.__init__(
+ self,
+ 'Failed to get list of git repositories '
+ 'on remote host %s:\n%s' % (trovehost, output))
+ self.trovehost = trovehost
+
+
+class TroveRepositoryLister(object):
+
+ def __init__(self, app_settings, route):
+ self.app_settings = app_settings
+ self.route = route
+
+ def list_trove_into_statedb(self, statedb, trove_info):
+ remote_paths = self.ls(statedb, trove_info)
+ remote_paths = self.skip_ignored_repos(trove_info, remote_paths)
+ repo_map = self.map_remote_repos_to_local_ones(
+ trove_info, remote_paths)
+
+ with statedb:
+ self.update_lorries_for_trove(statedb, trove_info, repo_map)
+ now = statedb.get_current_time()
+ statedb.set_trove_ls_last_run(trove_info['trovehost'], now)
+
+ def ls(self, statedb, trove_info):
+ if self.app_settings['debug-fake-trove']:
+ repo_paths = self.get_fake_ls_output(trove_info)
+ else:
+ repo_paths = self.get_real_ls_output(statedb, trove_info)
+
+ return repo_paths
+
+ def get_fake_ls_output(self, trove_info):
+ trovehost = trove_info['trovehost']
+ for item in self.app_settings['debug-fake-trove']:
+ host, path = item.split('=', 1)
+ if host == trovehost:
+ with open(path) as f:
+ obj = json.load(f)
+ return obj['ls-output']
+ return None
+
+ def get_real_ls_output(self, statedb, trove_info):
+ gitano = self.route.new_gitano_command(statedb, trove_info['trovehost'])
+ output = gitano.ls()
+ return self.parse_ls_output(output)
+
+ def parse_ls_output(self, ls_output):
+ repo_paths = []
+ for line in ls_output.splitlines():
+ words = line.split()
+ if words[0].startswith('R') and len(words) == 2:
+ repo_paths.append(words[1])
+ return repo_paths
+
+ def skip_ignored_repos(self, trovehost, repo_paths):
+ ignored_paths = json.loads(trovehost['ignore'])
+ return [x for x in repo_paths if x not in ignored_paths]
+
+ def map_remote_repos_to_local_ones(self, trove_info, remote_paths):
+ '''Return a dict that maps each remote repo path to a local one.'''
+ prefixmap = self.parse_prefixmap(trove_info['prefixmap'])
+ repo_map = {}
+ for remote_path in remote_paths:
+ local_path = self.map_one_remote_repo_to_local_one(
+ remote_path, prefixmap)
+ if local_path:
+ repo_map[remote_path] = local_path
+ else:
+ logging.debug('Remote repo %r not in prefixmap', remote_path)
+ return repo_map
+
+ def parse_prefixmap(self, prefixmap_string):
+ return json.loads(prefixmap_string)
+
+ def map_one_remote_repo_to_local_one(self, remote_path, prefixmap):
+ for remote_prefix in prefixmap:
+ if self.path_starts_with_prefix(remote_path, remote_prefix):
+ local_prefix = prefixmap[remote_prefix]
+ relative_path = remote_path[len(remote_prefix):]
+ local_path = local_prefix + relative_path
+ return local_path
+ return None
+
+ def path_starts_with_prefix(self, path, prefix):
+ return path.startswith(prefix) and path[len(prefix):].startswith('/')
+
+ def update_lorries_for_trove(self, statedb, trove_info, repo_map):
+ trovehost = trove_info['trovehost']
+ for remote_path, local_path in repo_map.items():
+ lorry = self.construct_lorry(trove_info, local_path, remote_path)
+ statedb.add_to_lorries(
+ path=local_path,
+ text=json.dumps(lorry, indent=4),
+ from_trovehost=trovehost,
+ from_path=remote_path,
+ interval=trove_info['lorry_interval'],
+ timeout=trove_info['lorry_timeout'])
+
+ all_local_paths = set(statedb.get_lorries_for_trove(trovehost))
+ wanted_local_paths = set(repo_map.values())
+ delete_local_paths = all_local_paths.difference(wanted_local_paths)
+ for local_path in delete_local_paths:
+ statedb.remove_lorry(local_path)
+
+ def construct_lorry(self, trove_info, local_path, remote_path):
+ return {
+ local_path: {
+ 'type': 'git',
+ 'url': self.construct_lorry_url(trove_info, remote_path),
+ 'refspecs': [
+ "+refs/heads/*",
+ "+refs/tags/*",
+ ],
+ }
+ }
+
+ def construct_lorry_url(self, trove_info, remote_path):
+ vars = dict(trove_info)
+ vars['remote_path'] = remote_path
+
+ patterns = {
+ 'ssh': 'ssh://git@{trovehost}/{remote_path}',
+ 'https':
+ 'https://{username}:{password}@{trovehost}/git/{remote_path}',
+ 'http': 'http://{trovehost}/git/{remote_path}',
+ }
+
+ return patterns[trove_info['protocol']].format(**vars)
+
+
+class ForceLsTrove(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/force-ls-trove'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ trovehost = bottle.request.forms.trovehost
+
+ statedb = self.open_statedb()
+ lister = TroveRepositoryLister(self.app_settings, self)
+ trove_info = statedb.get_trove_info(trovehost)
+ try:
+ updated = lister.list_trove_into_statedb(statedb, trove_info)
+ except GitanoLsError as e:
+ raise bottle.abort(500, str(e))
+
+ return { 'updated-troves': updated }
+
+
+class LsTroves(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/ls-troves'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ lister = TroveRepositoryLister(self.app_settings, self)
+
+ trove_infos = self.get_due_troves(statedb)
+ for trove_info in trove_infos:
+ logging.info('Trove %r is due an ls', trove_info['trovehost'])
+ try:
+ lister.list_trove_into_statedb(statedb, trove_info)
+ except GitanoLsError as e:
+ bottle.abort(500, str(e))
+
+ return {
+ 'updated-troves': [trove_info['trovehost'] for trove_info in trove_infos],
+ }
+
+ def get_due_troves(self, statedb):
+ trove_infos = [
+ statedb.get_trove_info(trovehost)
+ for trovehost in statedb.get_troves()]
+ now = statedb.get_current_time()
+ return [
+ trove_info
+ for trove_info in trove_infos
+ if self.is_due(trove_info, now)]
+
+ def is_due(self, trove_info, now):
+ ls_due = trove_info['ls_last_run'] + trove_info['ls_interval']
+ return ls_due <= now
diff --git a/lorrycontroller/maxjobs.py b/lorrycontroller/maxjobs.py
new file mode 100644
index 0000000..ce594c2
--- /dev/null
+++ b/lorrycontroller/maxjobs.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class GetMaxJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/get-max-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ return {
+ 'max_jobs': statedb.get_max_jobs(),
+ }
+
+
+class SetMaxJobs(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/set-max-jobs'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ statedb = self.open_statedb()
+ max_jobs = bottle.request.forms.max_jobs
+
+ with statedb:
+ statedb.set_max_jobs(max_jobs)
+ return {
+ 'max_jobs': statedb.get_max_jobs(),
+ }
diff --git a/lorrycontroller/movetopbottom.py b/lorrycontroller/movetopbottom.py
new file mode 100644
index 0000000..dcb79a4
--- /dev/null
+++ b/lorrycontroller/movetopbottom.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class MoveToTop(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/move-to-top'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ path = bottle.request.forms.path
+ statedb = self.open_statedb()
+ with statedb:
+ lorry_infos = statedb.get_all_lorries_info()
+ if lorry_infos:
+ topmost = lorry_infos[0]
+ timestamp = min(0, topmost['last_run'] - 1)
+ statedb.set_lorry_last_run(path, timestamp)
+ return 'Lorry %s moved to top of run-queue' % path
+
+
+class MoveToBottom(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/move-to-bottom'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ path = bottle.request.forms.path
+ statedb = self.open_statedb()
+ with statedb:
+ lorry_infos = statedb.get_all_lorries_info()
+ if lorry_infos:
+ bottommost = lorry_infos[-1]
+ timestamp = (
+ bottommost['last_run'] + bottommost['interval'] + 1)
+ statedb.set_lorry_last_run(path, timestamp)
+ return 'Lorry %s moved to bototm of run-queue' % path
diff --git a/lorrycontroller/pretendtime.py b/lorrycontroller/pretendtime.py
new file mode 100644
index 0000000..3fd1a70
--- /dev/null
+++ b/lorrycontroller/pretendtime.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import errno
+import glob
+import json
+import logging
+import os
+import re
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class PretendTime(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/pretend-time'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ now = bottle.request.forms.now
+
+ statedb = self.open_statedb()
+ with statedb:
+ statedb.set_pretend_time(now)
diff --git a/lorrycontroller/proxy.py b/lorrycontroller/proxy.py
new file mode 100644
index 0000000..44749c9
--- /dev/null
+++ b/lorrycontroller/proxy.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import os
+import urllib
+import urllib2
+
+
+def setup_proxy(config_filename):
+ """Tell urllib2 to use a proxy for http action by lorry-controller.
+
+ Load the proxy information from the JSON file given by proxy_def, then
+ set urllib2's url opener to open urls via an authenticated proxy.
+
+ """
+
+ if not os.path.exists(config_filename):
+ return
+
+ with open(config_filename, 'r') as f:
+ proxy = json.load(f)
+
+ # set the required environment variables
+ hostname = urllib.quote(proxy['hostname'])
+ user = '%s:%s' % (proxy['username'], proxy['password'])
+ url = '%s:%s' % (hostname, proxy['port'])
+ os.environ['http_proxy'] = 'http://%s@%s' % (user, url)
+ os.environ['https_proxy'] = 'https://%s@%s' % (user, url)
+
+ # create a ProxyHandler
+ proxies = {'http_proxy': 'http://%s@%s' % (user, url),
+ 'https_proxy': 'https://%s@%s' % (user, url)}
+ proxy_handler = urllib2.ProxyHandler(proxies)
+
+ # install an opener to use the proxy
+ opener = urllib2.build_opener(proxy_handler)
+ urllib2.install_opener(opener)
diff --git a/lorrycontroller/readconf.py b/lorrycontroller/readconf.py
new file mode 100644
index 0000000..b6f7333
--- /dev/null
+++ b/lorrycontroller/readconf.py
@@ -0,0 +1,347 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import errno
+import glob
+import json
+import logging
+import os
+import re
+
+import bottle
+import cliapp
+
+import lorrycontroller
+
+
+class LorryControllerConfParseError(Exception):
+
+ def __init__(self, filename, exc):
+ Exception.__init__(
+ self, 'ERROR reading %s: %s' % (filename, str(exc)))
+
+
+class ReadConfiguration(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/read-configuration'
+
+ DEFAULT_LORRY_TIMEOUT = 3600 # in seconds
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ self.get_confgit()
+
+ try:
+ conf_obj = self.read_config_file()
+ except LorryControllerConfParseError as e:
+ return str(e)
+
+ error = self.validate_config(conf_obj)
+ if error:
+ return 'ERROR: %s: %r' % (error, conf_obj)
+
+ self.fix_up_parsed_fields(conf_obj)
+
+ statedb = self.open_statedb()
+ with statedb:
+ existing_lorries = set(statedb.get_lorries_paths())
+ existing_troves = set(statedb.get_troves())
+
+ for section in conf_obj:
+ if not 'type' in section:
+ return 'ERROR: no type field in section'
+ if section['type'] == 'lorries':
+ added = self.add_matching_lorries_to_statedb(
+ statedb, section)
+ existing_lorries = existing_lorries.difference(added)
+ elif section['type'] in ('trove', 'troves'):
+ self.add_trove(statedb, section)
+ if section['trovehost'] in existing_troves:
+ existing_troves.remove(section['trovehost'])
+ existing_lorries = self.without_lorries_for_trovehost(
+ statedb, existing_lorries, section['trovehost'])
+ else:
+ logging.error(
+ 'Unknown section in configuration: %r', section)
+ return (
+ 'ERROR: Unknown section type in configuration: %r' %
+ section)
+
+ for path in existing_lorries:
+ statedb.remove_lorry(path)
+
+ for trovehost in existing_troves:
+ statedb.remove_trove(trovehost)
+ statedb.remove_lorries_for_trovehost(trovehost)
+
+
+ if 'redirect' in bottle.request.forms:
+ bottle.redirect(bottle.request.forms.redirect)
+
+ return 'Configuration has been updated.'
+
+ def without_lorries_for_trovehost(self, statedb, lorries, trovehost):
+ for_trovehost = statedb.get_lorries_for_trove(trovehost)
+ return set(x for x in lorries if x not in for_trovehost)
+
+ def get_confgit(self):
+ if self.app_settings['debug-real-confgit']:
+ confdir = self.app_settings['configuration-directory']
+ if not os.path.exists(confdir):
+ self.git_clone_confgit(confdir)
+ else:
+ self.git_pull_confgit(confdir)
+
+ def git_clone_confgit(self, confdir):
+ url = self.app_settings['confgit-url']
+ branch = self.app_settings['confgit-branch']
+ logging.info('Cloning %s to %s', url, confdir)
+ cliapp.runcmd(['git', 'clone', '-b', branch, url, confdir])
+
+ def git_pull_confgit(self, confdir):
+ logging.info('Updating CONFGIT in %s', confdir)
+ cliapp.runcmd(['git', 'pull'], cwd=confdir)
+
+ @property
+ def config_file_name(self):
+ return os.path.join(
+ self.app_settings['configuration-directory'],
+ 'lorry-controller.conf')
+
+ def read_config_file(self):
+ '''Read the configuration file, return as Python object.'''
+
+ filename = self.config_file_name
+ logging.debug('Reading configuration file %s', filename)
+
+ try:
+ with open(filename) as f:
+ return json.load(f)
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ logging.debug(
+ '%s: does not exist, returning empty config', filename)
+ return []
+ bottle.abort(500, 'Error reading %s: %s' % (filename, e))
+ except ValueError as e:
+ logging.error('Error parsing configuration: %s', e)
+ raise LorryControllerConfParseError(filename, e)
+
+ def validate_config(self, obj):
+ validator = LorryControllerConfValidator()
+ return validator.validate_config(obj)
+
+ def fix_up_parsed_fields(self, obj):
+ for item in obj:
+ item['interval'] = self.fix_up_interval(item.get('interval'))
+ item['ls-interval'] = self.fix_up_interval(item.get('ls-interval'))
+
+ def fix_up_interval(self, value):
+ default_interval = 86400 # 1 day
+ if not value:
+ return default_interval
+ m = re.match('(\d+)\s*(s|m|h|d)?', value, re.I)
+ if not m:
+ return default_value
+
+ number, factor = m.groups()
+ factors = {
+ 's': 1,
+ 'm': 60,
+ 'h': 60*60,
+ 'd': 60*60*24,
+ }
+ if factor is None:
+ factor = 's'
+ factor = factors.get(factor.lower(), 1)
+ return int(number) * factor
+
+ def add_matching_lorries_to_statedb(self, statedb, section):
+ logging.debug('Adding matching lorries to STATEDB')
+
+ added_paths = set()
+
+ filenames = self.find_lorry_files_for_section(section)
+ logging.debug('filenames=%r', filenames)
+ lorry_specs = []
+ for filename in sorted(filenames):
+ logging.debug('Reading .lorry: %s', filename)
+ for subpath, obj in self.get_valid_lorry_specs(filename):
+ self.add_refspecs_if_missing(obj)
+ lorry_specs.append((subpath, obj))
+
+ for subpath, obj in sorted(lorry_specs):
+ path = self.deduce_repo_path(section, subpath)
+ text = self.serialise_lorry_spec(path, obj)
+ interval = section['interval']
+ timeout = section.get(
+ 'lorry-timeout', self.DEFAULT_LORRY_TIMEOUT)
+
+ try:
+ old_lorry_info = statedb.get_lorry_info(path)
+ except lorrycontroller.LorryNotFoundError:
+ old_lorry_info = None
+
+ statedb.add_to_lorries(
+ path=path, text=text, from_trovehost='', from_path='',
+ interval=interval, timeout=timeout)
+
+ added_paths.add(path)
+
+ return added_paths
+
+ def find_lorry_files_for_section(self, section):
+ result = []
+ dirname = os.path.dirname(self.config_file_name)
+ for base_pattern in section['globs']:
+ pattern = os.path.join(dirname, base_pattern)
+ result.extend(glob.glob(pattern))
+ return result
+
+ def get_valid_lorry_specs(self, filename):
+ # We do some basic validation of the .lorry file and the Lorry
+ # specs contained within it. We silently ignore anything that
+ # doesn't look OK. We don't have a reasonable mechanism to
+ # communicate any problems to the user, but we do log them to
+ # the log file.
+
+ try:
+ with open(filename) as f:
+ obj = json.load(f)
+ except ValueError as e:
+ logging.error('JSON problem in %s', filename)
+ return []
+
+ if type(obj) != dict:
+ logging.error('%s: does not contain a dict', filename)
+ return []
+
+ items = []
+ for key in obj:
+ if type(obj[key]) != dict:
+ logging.error(
+ '%s: key %s does not map to a dict', filename, key)
+ continue
+
+ if 'type' not in obj[key]:
+ logging.error(
+ '%s: key %s does not have type field', filename, key)
+ continue
+
+ logging.debug('Happy with Lorry spec %r: %r', key, obj[key])
+ items.append((key, obj[key]))
+
+ return items
+
+ def add_refspecs_if_missing(self, obj):
+ if 'refspecs' not in obj:
+ obj['refspecs'] = [
+ '+refs/heads/*',
+ '+refs/tags/*',
+ ]
+
+ def deduce_repo_path(self, section, subpath):
+ return '%s/%s' % (section['prefix'], subpath)
+
+ def serialise_lorry_spec(self, path, obj):
+ new_obj = { path: obj }
+ return json.dumps(new_obj, indent=4)
+
+ def add_trove(self, statedb, section):
+ username = None
+ password = None
+ if 'auth' in section:
+ auth = section['auth']
+ username = auth.get('username')
+ password = auth.get('password')
+
+ statedb.add_trove(
+ trovehost=section['trovehost'],
+ protocol=section['protocol'],
+ username=username,
+ password=password,
+ lorry_interval=section['interval'],
+ lorry_timeout=section.get(
+ 'lorry-timeout', self.DEFAULT_LORRY_TIMEOUT),
+ ls_interval=section['ls-interval'],
+ prefixmap=json.dumps(section['prefixmap']),
+ ignore=json.dumps(section['ignore']))
+
+
+class LorryControllerConfValidator(object):
+
+ def validate_config(self, conf_obj):
+ try:
+ self._check_is_list(conf_obj)
+ self._check_is_list_of_dicts(conf_obj)
+
+ for section in conf_obj:
+ if 'type' not in section:
+ raise ValidationError(
+ 'section without type: %r' % section)
+ elif section['type'] in ('trove', 'troves'):
+ self._check_troves_section(section)
+ elif section['type'] == 'lorries':
+ self._check_lorries_section(section)
+ else:
+ raise ValidationError(
+ 'unknown section type %r' % section['type'])
+ except ValidationError as e:
+ return str(e)
+
+ return None
+
+ def _check_is_list(self, conf_obj):
+ if type(conf_obj) is not list:
+ raise ValidationError(
+ 'type %r is not a JSON list' % type(conf_obj))
+
+ def _check_is_list_of_dicts(self, conf_obj):
+ for item in conf_obj:
+ if type(item) is not dict:
+ raise ValidationError('all items must be dicts')
+
+ def _check_troves_section(self, section):
+ self._check_has_required_fields(
+ section,
+ ['trovehost', 'protocol', 'interval', 'ls-interval', 'prefixmap'])
+ self._check_prefixmap(section)
+
+ def _check_prefixmap(self, section):
+ # FIXME: We should be checking the prefixmap for things like
+ # mapping to a prefix that starts with the local Trove ID, but
+ # since we don't have easy access to that, we don't do that
+ # yet. This should be fixed later.
+ pass
+
+ def _check_lorries_section(self, section):
+ self._check_has_required_fields(
+ section, ['interval', 'prefix', 'globs'])
+
+ def _check_has_required_fields(self, section, fields):
+ for field in fields:
+ if field not in section:
+ raise ValidationError(
+ 'mandatory field %s missing in section %r' %
+ (field, section))
+
+
+class ValidationError(Exception):
+
+ def __init__(self, msg):
+ Exception.__init__(self, msg)
diff --git a/lorrycontroller/removejob.py b/lorrycontroller/removejob.py
new file mode 100644
index 0000000..5de65ba
--- /dev/null
+++ b/lorrycontroller/removejob.py
@@ -0,0 +1,44 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class RemoveJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/remove-job'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+
+ job_id = bottle.request.forms.job_id
+
+ statedb = self.open_statedb()
+ with statedb:
+ try:
+ statedb.find_lorry_running_job(job_id)
+ except lorrycontroller.WrongNumberLorriesRunningJob:
+ pass
+ else:
+ return { 'job_id': None, 'reason': 'still running' }
+
+ statedb.remove_job(job_id)
+ return { 'job_id': job_id }
diff --git a/lorrycontroller/route.py b/lorrycontroller/route.py
new file mode 100644
index 0000000..1eb4e5b
--- /dev/null
+++ b/lorrycontroller/route.py
@@ -0,0 +1,53 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import lorrycontroller
+
+
+class LorryControllerRoute(object):
+
+ '''Base class for Lorry Controller HTTP API routes.
+
+ A route is an HTTP request that the Bottle web application
+ recognises as satisfied by a particular callback. To make it
+ easier to implement them and get them added automagically to a
+ Bottle instance, we define the callbacks as subclasses of this
+ base class.
+
+ Subclasses MUST define the attributes ``http_method`` and
+ ``path``, which are given the bottle.Bottle.route method as the
+ arguments ``method`` and ``path``, respectively.
+
+ '''
+
+ def __init__(self, app_settings, templates):
+ self.app_settings = app_settings
+ self._templates = templates
+ self._statedb = None
+
+ def open_statedb(self):
+ return lorrycontroller.StateDB(self.app_settings['statedb'])
+
+ def new_gitano_command(self, statedb, trovehost):
+ trove_info = statedb.get_trove_info(trovehost)
+ return lorrycontroller.GitanoCommand(
+ trovehost,
+ trove_info['protocol'],
+ trove_info['username'],
+ trove_info['password'])
+
+ def run(self, **kwargs):
+ raise NotImplementedError()
diff --git a/lorrycontroller/showjob.py b/lorrycontroller/showjob.py
new file mode 100644
index 0000000..6f73ed6
--- /dev/null
+++ b/lorrycontroller/showjob.py
@@ -0,0 +1,83 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class JobShower(object):
+
+ def get_job_as_json(self, statedb, job_id):
+ path = statedb.get_job_path(job_id)
+ exit = statedb.get_job_exit(job_id)
+ output = statedb.get_job_output(job_id)
+ started, ended = statedb.get_job_started_and_ended(job_id)
+ disk_usage = statedb.get_job_disk_usage(job_id)
+ now = statedb.get_current_time()
+
+ return {
+ 'job_id': job_id,
+ 'host': statedb.get_job_minion_host(job_id),
+ 'pid': statedb.get_job_minion_pid(job_id),
+ 'path': statedb.get_job_path(job_id),
+ 'exit': 'no' if exit is None else exit,
+ 'disk_usage': disk_usage,
+ 'disk_usage_nice': self.format_bytesize(disk_usage),
+ 'output': output,
+ 'job_started': self.format_time(started),
+ 'job_ended': self.format_time(ended),
+ 'timestamp': self.format_time(now),
+ }
+
+ def format_time(self, timestamp):
+ return time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(timestamp))
+
+ def format_bytesize(self, num_bytes):
+ if num_bytes is None:
+ return 'unknown'
+ mebibyte = 2**20
+ return '%.1f MiB' % (float(num_bytes) / float(mebibyte))
+
+
+class ShowJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/job/<job_id:int>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ job_id = int(kwargs['job_id'])
+
+ statedb = self.open_statedb()
+ return JobShower().get_job_as_json(statedb, job_id)
+
+
+class ShowJobHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/job-html/<job_id:int>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ job_id = int(kwargs['job_id'])
+
+ statedb = self.open_statedb()
+ variables = JobShower().get_job_as_json(statedb, job_id)
+ return bottle.template(self._templates['job'], **variables)
diff --git a/lorrycontroller/showlorry.py b/lorrycontroller/showlorry.py
new file mode 100644
index 0000000..fc336a5
--- /dev/null
+++ b/lorrycontroller/showlorry.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import json
+import logging
+import time
+import urlparse
+
+import bottle
+
+import lorrycontroller
+
+
+class ShowLorry(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/lorry/<path:path>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ try:
+ return statedb.get_lorry_info(kwargs['path'])
+ except lorrycontroller.LorryNotFoundError as e:
+ bottle.abort(404, str(e))
+
+
+class ShowLorryHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/lorry-html/<path:path>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ try:
+ lorry_info = statedb.get_lorry_info(kwargs['path'])
+ except lorrycontroller.LorryNotFoundError as e:
+ bottle.abort(404, str(e))
+
+ renderer = lorrycontroller.StatusRenderer()
+ shower = lorrycontroller.JobShower()
+
+ lorry_obj = json.loads(lorry_info['text']).values()[0]
+ lorry_info['url'] = lorry_obj['url']
+
+ lorry_info['interval_nice'] = renderer.format_secs_nicely(
+ lorry_info['interval'])
+
+ lorry_info['last_run_nice'] = time.strftime(
+ '%Y-%m-%d %H:%M:%S UTC',
+ time.gmtime(lorry_info['last_run']))
+
+ lorry_info['disk_usage_nice'] = shower.format_bytesize(
+ lorry_info['disk_usage'])
+
+ now = statedb.get_current_time()
+
+ due = lorry_info['last_run'] + lorry_info['interval']
+ lorry_info['due_nice'] = renderer.format_due_nicely(due, now)
+
+ timestamp = time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now))
+
+ parts = urlparse.urlparse(bottle.request.url)
+ host, port = parts.netloc.split(':', 1)
+ http_server_root = urlparse.urlunparse(
+ (parts.scheme, host, '', '', '', ''))
+
+ return bottle.template(
+ self._templates['lorry'],
+ http_server_root=http_server_root,
+ lorry=lorry_info,
+ timestamp=timestamp)
diff --git a/lorrycontroller/startstopqueue.py b/lorrycontroller/startstopqueue.py
new file mode 100644
index 0000000..58da2d0
--- /dev/null
+++ b/lorrycontroller/startstopqueue.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class StartQueue(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/start-queue'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ with statedb:
+ statedb.set_running_queue(1)
+
+ if 'redirect' in bottle.request.forms:
+ bottle.redirect(bottle.request.forms.redirect)
+
+ return 'Queue set to run'
+
+
+class StopQueue(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/stop-queue'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ with statedb:
+ statedb.set_running_queue(0)
+
+ if 'redirect' in bottle.request.forms:
+ bottle.redirect(bottle.request.forms.redirect)
+
+ return 'Queue set to not run'
diff --git a/lorrycontroller/statedb.py b/lorrycontroller/statedb.py
new file mode 100644
index 0000000..b7950e1
--- /dev/null
+++ b/lorrycontroller/statedb.py
@@ -0,0 +1,577 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import sqlite3
+import time
+
+import lorrycontroller
+
+
+class LorryNotFoundError(Exception):
+
+ def __init__(self, path):
+ Exception.__init__(
+ self, 'Lorry with path %r not found in STATEDB' % path)
+
+
+class WrongNumberLorriesRunningJob(Exception):
+
+ def __init__(self, job_id, row_count):
+ Exception.__init__(
+ self, 'STATEDB has %d Lorry specs running job %r, should be 1' %
+ (row_count, job_id))
+
+
+class TroveNotFoundError(Exception):
+
+ def __init__(self, trovehost):
+ Exception.__init__(
+ self, 'Trove %s not known in STATEDB' % trovehost)
+
+
+class StateDB(object):
+
+ '''A wrapper around raw Sqlite for STATEDB.'''
+
+ def __init__(self, filename):
+ logging.debug('Creating StateDB instance for %r', filename)
+ self._filename = filename
+ self._conn = None
+ self._transaction_started = None
+
+ def _open(self):
+ self.lorries_fields = [
+ ('path', 'TEXT PRIMARY KEY'),
+ ('text', 'TEXT'),
+ ('from_trovehost', 'TEXT'),
+ ('from_path', 'TEXT'),
+ ('running_job', 'INT'),
+ ('kill_job', 'INT'),
+ ('last_run', 'INT'),
+ ('interval', 'INT'),
+ ('lorry_timeout', 'INT'),
+ ('disk_usage', 'INT'),
+ ]
+ self.lorries_booleans = [
+ 'kill_job',
+ ]
+
+ if self._conn is None:
+ existed = os.path.exists(self._filename)
+ logging.debug(
+ 'Connecting to %r (existed=%r)', self._filename, existed)
+ self._conn = sqlite3.connect(
+ self._filename,
+ timeout=100000,
+ isolation_level="IMMEDIATE")
+ logging.debug('New connection is %r', self._conn)
+ if not existed:
+ self._initialise_tables()
+
+ def _initialise_tables(self):
+ logging.debug('Initialising tables in database')
+ c = self._conn.cursor()
+
+ # Table for holding the "are we scheduling jobs" value.
+ c.execute('CREATE TABLE running_queue (running INT)')
+ c.execute('INSERT INTO running_queue VALUES (1)')
+
+ # Table for known remote Troves.
+
+ c.execute(
+ 'CREATE TABLE troves ('
+ 'trovehost TEXT PRIMARY KEY, '
+ 'protocol TEXT, '
+ 'username TEXT, '
+ 'password TEXT, '
+ 'lorry_interval INT, '
+ 'lorry_timeout INT, '
+ 'ls_interval INT, '
+ 'ls_last_run INT, '
+ 'prefixmap TEXT, '
+ 'ignore TEXT '
+ ')')
+
+ # Table for all the known lorries (the "run queue").
+
+ fields_sql = ', '.join(
+ '%s %s' % (name, info) for name, info in self.lorries_fields
+ )
+
+ c.execute('CREATE TABLE lorries (%s)' % fields_sql)
+
+ # Table for the next available job id.
+ c.execute('CREATE TABLE next_job_id (job_id INT)')
+ c.execute('INSERT INTO next_job_id VALUES (1)')
+
+ # Table of all jobs (running or not), and their info.
+ c.execute(
+ 'CREATE TABLE jobs ('
+ 'job_id INT PRIMARY KEY, '
+ 'host TEXT, '
+ 'pid INT, '
+ 'started INT, '
+ 'ended INT, '
+ 'path TEXT, '
+ 'exit TEXT, '
+ 'disk_usage INT, '
+ 'output TEXT)')
+
+ # Table for holding max number of jobs running at once. If no
+ # rows, there is no limit. Otherwise, there is exactly one
+ # row.
+ c.execute('CREATE TABLE max_jobs (max_jobs INT)')
+
+ # A table to give the current pretended time, if one is set.
+ # This table is either empty, in which case time.time() is
+ # used, or has one row, which is used for the current time.
+ c.execute('CREATE TABLE time (now INT)')
+
+ # Stupid table we can always write to to trigger the start of
+ # a transaction.
+ c.execute('CREATE TABLE stupid (value INT)')
+
+ # Done.
+ self._conn.commit()
+ logging.debug('Finished initialising tables in STATEDB')
+
+ @property
+ def in_transaction(self):
+ return self._transaction_started is not None
+
+ def __enter__(self):
+ logging.debug('Entering context manager (%r)', self)
+ assert not self.in_transaction
+ self._transaction_started = time.time()
+ self._open()
+ c = self._conn.cursor()
+ c.execute('INSERT INTO stupid VALUES (1)')
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ logging.debug('Exiting context manager (%r)', self)
+ assert self.in_transaction
+ if exc_type is None:
+ logging.debug(
+ 'Committing transaction in __exit__ (%r)', self._conn)
+ c = self._conn.cursor()
+ c.execute('DELETE FROM stupid')
+ self._conn.commit()
+ else:
+ logging.error(
+ 'Rolling back transaction in __exit__ (%r)',
+ self._conn,
+ exc_info=(exc_type, exc_val, exc_tb))
+ self._conn.rollback()
+ self._conn.close()
+ self._conn = None
+ logging.debug(
+ 'Transaction duration: %r',
+ time.time() - self._transaction_started)
+ self._transaction_started = None
+ return False
+
+ def get_cursor(self):
+ '''Return a new cursor.'''
+ self._open()
+ return self._conn.cursor()
+
+ def get_running_queue(self):
+ c = self.get_cursor()
+ for (running,) in c.execute('SELECT running FROM running_queue'):
+ return bool(running)
+
+ def set_running_queue(self, new_status):
+ logging.debug('StateDB.set_running_queue(%r) called', new_status)
+ assert self.in_transaction
+ if new_status:
+ new_value = 1
+ else:
+ new_value = 0
+ self.get_cursor().execute(
+ 'UPDATE running_queue SET running = ?', str(new_value))
+
+ def get_trove_info(self, trovehost):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT protocol, username, password, lorry_interval, '
+ 'lorry_timeout, ls_interval, ls_last_run, '
+ 'prefixmap, ignore '
+ 'FROM troves WHERE trovehost IS ?',
+ (trovehost,))
+ row = c.fetchone()
+ if row is None:
+ raise lorrycontroller.TroveNotFoundError(trovehost)
+ return {
+ 'trovehost': trovehost,
+ 'protocol': row[0],
+ 'username': row[1],
+ 'password': row[2],
+ 'lorry_interval': row[3],
+ 'lorry_timeout': row[4],
+ 'ls_interval': row[5],
+ 'ls_last_run': row[6],
+ 'prefixmap': row[7],
+ 'ignore': row[8],
+ }
+
+ def add_trove(self, trovehost=None, protocol=None, username=None,
+ password=None, lorry_interval=None,
+ lorry_timeout=None, ls_interval=None,
+ prefixmap=None, ignore=None):
+ logging.debug(
+ 'StateDB.add_trove(%r,%r,%r,%r,%r,%r) called',
+ trovehost, lorry_interval, lorry_timeout, ls_interval,
+ prefixmap, ignore)
+
+ assert trovehost is not None
+ assert protocol is not None
+ assert lorry_interval is not None
+ assert lorry_timeout is not None
+ assert ls_interval is not None
+ assert prefixmap is not None
+ assert ignore is not None
+ assert self.in_transaction
+
+ try:
+ self.get_trove_info(trovehost)
+ except lorrycontroller.TroveNotFoundError:
+ c = self.get_cursor()
+ c.execute(
+ 'INSERT INTO troves '
+ '(trovehost, protocol, username, password, '
+ 'lorry_interval, lorry_timeout, '
+ 'ls_interval, ls_last_run, '
+ 'prefixmap, ignore) '
+ 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',
+ (trovehost, protocol, username, password,
+ lorry_interval, lorry_timeout, ls_interval, 0,
+ prefixmap, ignore))
+ else:
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE troves '
+ 'SET lorry_interval=?, lorry_timeout=?, ls_interval=?, '
+ 'prefixmap=?, ignore=?, protocol=? '
+ 'WHERE trovehost IS ?',
+ (lorry_interval, lorry_timeout, ls_interval, prefixmap,
+ ignore, protocol, trovehost))
+
+ def remove_trove(self, trovehost):
+ logging.debug('StateDB.remove_trove(%r) called', trovehost)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM troves WHERE trovehost=?', (trovehost,))
+
+ def get_troves(self):
+ c = self.get_cursor()
+ c.execute('SELECT trovehost FROM troves')
+ return [row[0] for row in c.fetchall()]
+
+ def set_trove_ls_last_run(self, trovehost, ls_last_run):
+ logging.debug(
+ 'StateDB.set_trove_ls_last_run(%r,%r) called',
+ trovehost, ls_last_run)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE troves SET ls_last_run=? WHERE trovehost=?',
+ (ls_last_run, trovehost))
+
+ def make_lorry_info_from_row(self, row):
+ result = dict((t[0], row[i]) for i, t in enumerate(self.lorries_fields))
+ for field in self.lorries_booleans:
+ result[field] = bool(result[field])
+ return result
+
+ def get_lorry_info(self, path):
+ c = self.get_cursor()
+ c.execute('SELECT * FROM lorries WHERE path IS ?', (path,))
+ row = c.fetchone()
+ if row is None:
+ raise lorrycontroller.LorryNotFoundError(path)
+ return self.make_lorry_info_from_row(row)
+
+ def get_all_lorries_info(self):
+ c = self.get_cursor()
+ c.execute('SELECT * FROM lorries ORDER BY (last_run + interval)')
+ return [self.make_lorry_info_from_row(row) for row in c.fetchall()]
+
+ def get_lorries_paths(self):
+ c = self.get_cursor()
+ return [
+ row[0]
+ for row in c.execute(
+ 'SELECT path FROM lorries ORDER BY (last_run + interval)')]
+
+ def get_lorries_for_trove(self, trovehost):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT path FROM lorries WHERE from_trovehost IS ?', (trovehost,))
+ return [row[0] for row in c.fetchall()]
+
+ def add_to_lorries(self, path=None, text=None, from_trovehost=None,
+ from_path=None, interval=None, timeout=None):
+ logging.debug(
+ 'StateDB.add_to_lorries('
+ 'path=%r, text=%r, from_trovehost=%r, interval=%s, '
+ 'timeout=%r called',
+ path,
+ text,
+ from_trovehost,
+ interval,
+ timeout)
+
+ assert path is not None
+ assert text is not None
+ assert from_trovehost is not None
+ assert from_path is not None
+ assert interval is not None
+ assert timeout is not None
+ assert self.in_transaction
+
+ try:
+ self.get_lorry_info(path)
+ except lorrycontroller.LorryNotFoundError:
+ c = self.get_cursor()
+ c.execute(
+ 'INSERT INTO lorries '
+ '(path, text, from_trovehost, from_path, last_run, interval, '
+ 'lorry_timeout, running_job, kill_job) '
+ 'VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
+ (path, text, from_trovehost, from_path, 0,
+ interval, timeout, None, 0))
+ else:
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries '
+ 'SET text=?, from_trovehost=?, from_path=?, interval=?, '
+ 'lorry_timeout=? '
+ 'WHERE path IS ?',
+ (text, from_trovehost, from_path, interval, timeout, path))
+
+ def remove_lorry(self, path):
+ logging.debug('StateDB.remove_lorry(%r) called', path)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM lorries WHERE path IS ?', (path,))
+
+ def remove_lorries_for_trovehost(self, trovehost):
+ logging.debug(
+ 'StateDB.remove_lorries_for_trovest(%r) called', trovehost)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM lorries WHERE from_trovehost IS ?', (trovehost,))
+
+ def set_running_job(self, path, job_id):
+ logging.debug(
+ 'StateDB.set_running_job(%r, %r) called', path, job_id)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET running_job=? WHERE path=?',
+ (job_id, path))
+
+ def find_lorry_running_job(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT path FROM lorries WHERE running_job IS ?',
+ (job_id,))
+ rows = c.fetchall()
+ if len(rows) != 1:
+ raise lorrycontroller.WrongNumberLorriesRunningJob(job_id, len(rows))
+ return rows[0][0]
+
+ def get_running_jobs(self):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT running_job FROM lorries WHERE running_job IS NOT NULL')
+ return [row[0] for row in c.fetchall()]
+
+ def set_kill_job(self, path, value):
+ logging.debug('StateDB.set_kill_job(%r, %r) called', path, value)
+ assert self.in_transaction
+ if value:
+ value = 1
+ else:
+ value = 0
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET kill_job=? WHERE path=?',
+ (value, path))
+
+ def set_lorry_last_run(self, path, last_run):
+ logging.debug(
+ 'StateDB.set_lorry_last_run(%r, %r) called', path, last_run)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET last_run=? WHERE path=?',
+ (last_run, path))
+
+ def set_lorry_disk_usage(self, path, disk_usage):
+ logging.debug(
+ 'StateDB.set_lorry_disk_usage(%r, %r) called', path, disk_usage)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE lorries SET disk_usage=? WHERE path=?',
+ (disk_usage, path))
+
+ def get_next_job_id(self):
+ logging.debug('StateDB.get_next_job_id called')
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('SELECT job_id FROM next_job_id')
+ row = c.fetchone()
+ job_id = row[0]
+ c.execute('UPDATE next_job_id SET job_id=?', (job_id + 1,))
+ return job_id
+
+ def get_job_ids(self):
+ c = self.get_cursor()
+ c.execute('SELECT job_id FROM jobs')
+ return [row[0] for row in c.fetchall()]
+
+ def add_new_job(self, job_id, host, pid, path, started):
+ logging.debug(
+ 'StateDB.add_new_job(%r, %r, %r, %r, %r) called',
+ job_id, host, pid, path, started)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'INSERT INTO jobs (job_id, host, pid, path, started) '
+ 'VALUES (?, ?, ?, ?, ?)',
+ (job_id, host, pid, path, started))
+
+ def get_job_minion_host(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT host FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_minion_pid(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT pid FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_path(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT path FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_started_and_ended(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT started, ended FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0], row[1]
+
+ def get_job_exit(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT exit FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def set_job_exit(self, job_id, exit, ended, disk_usage):
+ logging.debug(
+ 'StateDB.set_job_exit(%r, %r, %r, %r) called',
+ job_id, exit, ended, disk_usage)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE jobs SET exit=?, ended=?, disk_usage=? '
+ 'WHERE job_id IS ?',
+ (exit, ended, disk_usage, job_id))
+
+ def get_job_disk_usage(self, job_id):
+ c = self.get_cursor()
+ c.execute('SELECT disk_usage FROM jobs WHERE job_id IS ?', (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def get_job_output(self, job_id):
+ c = self.get_cursor()
+ c.execute(
+ 'SELECT output FROM jobs WHERE job_id IS ?',
+ (job_id,))
+ row = c.fetchone()
+ return row[0]
+
+ def append_to_job_output(self, job_id, more_output):
+ logging.debug('StateDB.append_to_job_output(%r,..) called', job_id)
+ assert self.in_transaction
+
+ output = self.get_job_output(job_id) or ''
+
+ c = self.get_cursor()
+ c.execute(
+ 'UPDATE jobs SET output=? WHERE job_id=?',
+ (output + more_output, job_id))
+
+ def remove_job(self, job_id):
+ logging.debug('StateDB.append_to_job_output(%r,..) called', job_id)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM jobs WHERE job_id = ?', (job_id,))
+
+ def set_pretend_time(self, now):
+ logging.debug('StateDB.set_pretend_time(%r) called', now)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM time')
+ c.execute('INSERT INTO time (now) VALUES (?)', (int(now),))
+
+ def get_current_time(self):
+ c = self.get_cursor()
+ c.execute('SELECT now FROM time')
+ row = c.fetchone()
+ if row:
+ return row[0]
+ else:
+ return time.time()
+
+ def get_max_jobs(self):
+ c = self.get_cursor()
+ c.execute('SELECT max_jobs FROM max_jobs')
+ row = c.fetchone()
+ if row:
+ logging.info('returning max_jobs as %r', row[0])
+ return row[0]
+ logging.info('returning max_jobs as None')
+ return None
+
+ def set_max_jobs(self, max_jobs):
+ logging.debug('StateDB.set_max_jobs(%r) called', max_jobs)
+ assert self.in_transaction
+ c = self.get_cursor()
+ c.execute('DELETE FROM max_jobs')
+ if max_jobs is not None:
+ c.execute(
+ 'INSERT INTO max_jobs (max_jobs) VALUES (?)', (max_jobs,))
diff --git a/lorrycontroller/static.py b/lorrycontroller/static.py
new file mode 100644
index 0000000..a8ba938
--- /dev/null
+++ b/lorrycontroller/static.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class StaticFile(lorrycontroller.LorryControllerRoute):
+
+ # Note that the path below must match what lighttpd (running on a
+ # different port than us) would accept.
+
+ http_method = 'GET'
+ path = '/lc-static/<filename>'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ return bottle.static_file(
+ kwargs['filename'],
+ self.app_settings['static-files'])
diff --git a/lorrycontroller/status.py b/lorrycontroller/status.py
new file mode 100644
index 0000000..bd32e6b
--- /dev/null
+++ b/lorrycontroller/status.py
@@ -0,0 +1,169 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import time
+
+import bottle
+
+import lorrycontroller
+
+
+class StatusRenderer(object):
+
+ '''Helper class for rendering service status as JSON/HTML'''
+
+ def get_status_as_dict(self, statedb, work_directory):
+ quotes = [
+ "Never get drunk unless you're willing to pay for it - "
+ "the next day.",
+ "I'm giving her all she's got, Captain!",
+ ]
+ import random
+ now = statedb.get_current_time()
+ status = {
+ 'quote': '%s' % random.choice(quotes),
+ 'running_queue': statedb.get_running_queue(),
+ 'timestamp':
+ time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(now)),
+ 'run_queue': self.get_run_queue(statedb),
+ 'troves': self.get_troves(statedb),
+ 'warning_msg': '',
+ 'max_jobs': self.get_max_jobs(statedb),
+ }
+ status.update(self.get_free_disk_space(work_directory))
+ return status
+
+ def render_status_as_html(self, template, status):
+ return bottle.template(template, **status)
+
+ def write_status_as_html(self, template, status, filename):
+ html = self.render_status_as_html(template, status)
+ try:
+ with open(filename, 'w') as f:
+ f.write(html)
+ except (OSError, IOError) as e:
+ status['warning_msg'] = (
+ 'ERROR WRITING STATUS HTML TO DISK: %s' % str(e))
+
+ def get_free_disk_space(self, dirname):
+ result = os.statvfs(dirname)
+ free_bytes = result.f_bavail * result.f_bsize
+ return {
+ 'disk_free': free_bytes,
+ 'disk_free_mib': free_bytes / 1024**2,
+ 'disk_free_gib': free_bytes / 1024**3,
+ }
+
+ def get_run_queue(self, statedb):
+ lorries = statedb.get_all_lorries_info()
+ now = statedb.get_current_time()
+ for lorry in lorries:
+ due = lorry['last_run'] + lorry['interval']
+ lorry['interval_nice'] = self.format_secs_nicely(lorry['interval'])
+ lorry['due_nice'] = self.format_due_nicely(due, now)
+ return lorries
+
+ def format_due_nicely(self, due, now):
+ now = int(now)
+ if due <= now:
+ return 'now'
+ else:
+ nice = self.format_secs_nicely(due - now)
+ return 'in %s' % nice
+
+ def format_secs_nicely(self, secs):
+ if secs <= 0:
+ return 'now'
+
+ result = []
+
+ hours = secs / 3600
+ secs %= 3600
+ mins = secs / 60
+ secs %= 60
+
+ if hours > 0:
+ result.append('%d h' % hours)
+ if mins > 0:
+ result.append('%d min' % mins)
+ elif mins > 0:
+ result.append('%d min' % mins)
+ if secs > 0:
+ result.append('%d s' % secs)
+ else:
+ result.append('%d s' % secs)
+
+ return ' '.join(result)
+
+ def get_troves(self, statedb):
+ troves = []
+ for trovehost in statedb.get_troves():
+ trove_info = statedb.get_trove_info(trovehost)
+
+ trove_info['ls_interval_nice'] = self.format_secs_nicely(
+ trove_info['ls_interval'])
+
+ ls_due = trove_info['ls_last_run'] + trove_info['ls_interval']
+ now = int(statedb.get_current_time())
+ trove_info['ls_due_nice'] = self.format_due_nicely(ls_due, now)
+
+ troves.append(trove_info)
+ return troves
+
+ def get_max_jobs(self, statedb):
+ max_jobs = statedb.get_max_jobs()
+ if max_jobs is None:
+ return 'unlimited'
+ return max_jobs
+
+
+class Status(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/status'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ renderer = StatusRenderer()
+ statedb = self.open_statedb()
+ status = renderer.get_status_as_dict(
+ statedb, self.app_settings['statedb'])
+ renderer.write_status_as_html(
+ self._templates['status'],
+ status,
+ self.app_settings['status-html'])
+ return status
+
+
+class StatusHTML(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'GET'
+ path = '/1.0/status-html'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ renderer = StatusRenderer()
+ statedb = self.open_statedb()
+ status = renderer.get_status_as_dict(
+ statedb, self.app_settings['statedb'])
+ renderer.write_status_as_html(
+ self._templates['status'],
+ status,
+ self.app_settings['status-html'])
+ return renderer.render_status_as_html(
+ self._templates['status'], status)
diff --git a/lorrycontroller/stopjob.py b/lorrycontroller/stopjob.py
new file mode 100644
index 0000000..947f733
--- /dev/null
+++ b/lorrycontroller/stopjob.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+
+import bottle
+
+import lorrycontroller
+
+
+class StopJob(lorrycontroller.LorryControllerRoute):
+
+ http_method = 'POST'
+ path = '/1.0/stop-job'
+
+ def run(self, **kwargs):
+ logging.info('%s %s called', self.http_method, self.path)
+ statedb = self.open_statedb()
+ with statedb:
+ job_id = bottle.request.forms.job_id
+ try:
+ path = statedb.find_lorry_running_job(job_id)
+ except lorrycontroller.WrongNumberLorriesRunningJob:
+ logging.warning(
+ "Tried to kill job %s which isn't running" % job_id)
+ bottle.abort(409, 'Job is not currently running')
+ statedb.set_kill_job(path, True)
+ return statedb.get_lorry_info(path)
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e214c33
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,21 @@
+#!/usr/bin/python
+#
+# Copyright (C) 2012-2014 Codethink Limited
+
+
+from distutils.core import setup
+import glob
+
+
+setup(name='lorry-controller',
+ description='Run Lorry on remote repositories in a controller manner',
+ author='Codethink Ltd',
+ author_email='baserock-dev@baserock.org',
+ url='http://www.baserock.com/',
+ scripts=['lorry-controller-webapp', 'lorry-controller-minion'],
+ packages=['lorrycontroller'],
+ data_files=[
+ ('share/lorry-controller/templates', glob.glob('templates/*')),
+ ('share/lorry-controller/static', glob.glob('static/*')),
+ ],
+ )
diff --git a/static/style.css b/static/style.css
new file mode 100644
index 0000000..8a6937d
--- /dev/null
+++ b/static/style.css
@@ -0,0 +1,18 @@
+table {
+ border: 1px solid black;
+}
+
+th, td {
+ padding-right: 2em;
+}
+
+th {
+ font-weight: bold;
+ text-align: left;
+}
+
+td {
+ font-family: monospace;
+ border-top: 1px solid black;
+ text-align: left;
+}
diff --git a/templates/job.tpl b/templates/job.tpl
new file mode 100644
index 0000000..33fe909
--- /dev/null
+++ b/templates/job.tpl
@@ -0,0 +1,20 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry Controller: Job {{job_id}}</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+<h1>Status of job {{job_id}}</h1>
+<p>Path of git repo: <code>{{path}}</code></p>
+<p>Started: {{job_started}}</p>
+<p>Ended: {{job_ended}}</p>
+<p>MINION: <code>{{host}}:{{pid}}</code></p>
+<p>Exit code: <code>{{exit}}</code></p>
+<p>Lorry disk usage (after job's finished): {{disk_usage_nice}}</p>
+<p>Output:</p>
+<pre>{{output}}</pre>
+<hr />
+<p>Updated: {{timestamp}}</p>
+ </body>
+</html>
diff --git a/templates/list-jobs.tpl b/templates/list-jobs.tpl
new file mode 100644
index 0000000..1d530aa
--- /dev/null
+++ b/templates/list-jobs.tpl
@@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry Controller: ALL the jobs</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+ % import json
+
+ <h1>ALL the jobs</h2>
+
+<table>
+<tr>
+<th>Job ID</th>
+<th>path</th>
+<th>exit?</th>
+</tr>
+% for job in job_infos:
+<tr>
+<td><a href="/1.0/job-html/{{job['job_id']}}">{{job['job_id']}}</a></td>
+<td><a href="/1.0/lorry-html/{{job['path']}}">{{job['path']}}</a></td>
+<td>{{job['exit']}}</td>
+</tr>
+% end
+</table>
+
+ <hr />
+
+ <p>Updated: {{timestamp}}</p>
+
+ </body>
+</html>
diff --git a/templates/lorry.tpl b/templates/lorry.tpl
new file mode 100644
index 0000000..fad85cd
--- /dev/null
+++ b/templates/lorry.tpl
@@ -0,0 +1,44 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry {{lorry['path']}}</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+ % import json
+
+ <h1>Lorry {{lorry['path']}}</h1>
+
+<table>
+
+<tr> <th>Path</th>
+ <td><a href="{{http_server_root}}/cgi-bin/cgit.cgi/{{lorry['path']}}.git/">{{lorry['path']}}</a></td> </tr>
+<tr> <th>URL</th> <td>{{lorry['url']}}</td> </tr>
+<tr> <th>Interval</th> <td>{{lorry['interval_nice']}} ({{lorry['interval']}} s)</td> </tr>
+<tr> <th>Last run</th> <td>{{lorry['last_run_nice']}}</td> </tr>
+<tr> <th>Due</th> <td>{{lorry['due_nice']}}</td> </tr>
+<tr> <th>From Trove</th> <td>{{lorry['from_trovehost']}}</td> </tr>
+<tr> <th>Disk usage</th> <td>{{lorry['disk_usage_nice']}}</td> </tr>
+
+<tr> <th>Job?</th>
+% if lorry['running_job']:
+<td><a href="/1.0/job/{{lorry['running_job']}}">{{lorry['running_job']}}</a></td>
+% else:
+<td></td>
+% end
+</tr>
+
+</table>
+
+<h2>Full Lorry text</h2>
+
+<blockquote>
+<pre>{{lorry['text']}}</pre>
+</blockquote>
+
+ <hr />
+
+ <p>Updated: {{timestamp}}</p>
+
+ </body>
+</html>
diff --git a/templates/status.tpl b/templates/status.tpl
new file mode 100644
index 0000000..80f1ff7
--- /dev/null
+++ b/templates/status.tpl
@@ -0,0 +1,115 @@
+<!DOCTYPE HTML>
+<html>
+ <head>
+ <title>Lorry Controller status</title>
+ <link rel="stylesheet" href="/lc-static/style.css" type="text/css" />
+ </head>
+ <body>
+ % import json
+
+ <p>{{warning_msg}}</p>
+
+ <h1>Status of Lorry Controller</h1>
+
+% if running_queue:
+<form method="POST" action="/1.0/stop-queue">
+ <p>New jobs are allowed.
+ <input type="submit" name="submit" value="Don't allow new jobs" />
+ <input type="hidden" name="redirect" value="/1.0/status-html" />
+ </p>
+</form>
+% else:
+<form method="POST" action="/1.0/start-queue">
+ <p>New jobs are NOT allowed.
+ <input type="submit" name="submit" value="Allow new jobs" />
+ <input type="hidden" name="redirect" value="/1.0/status-html" />
+ </p>
+</form>
+% end
+
+<form method="POST" action="/1.0/read-configuration">
+ <p>
+ <input type="submit" name="submit" value="Re-read configuration" />
+ <input type="hidden" name="redirect" value="/1.0/status-html" />
+ </p>
+</form>
+
+<p>Maximum number of jobs: {{max_jobs}}.</p>
+
+ <p>Free disk space: {{disk_free_gib}} GiB.</p>
+
+<h2>Remote Troves</h2>
+
+<table>
+<tr>
+<th>Trove host</th>
+<th>Due for re-scan of remote repositories</th>
+</tr>
+% for trove_info in troves:
+<tr>
+<td>{{trove_info['trovehost']}}</td>
+<td>{{trove_info['ls_due_nice']}}</td>
+</tr>
+% end
+</table>
+
+ <h2>Currently running jobs</h2>
+
+% if len(run_queue) == 0:
+<p>There are no jobs running at this time.</p>
+% else:
+<table>
+<tr>
+<th>Job ID</th>
+<th>path</th>
+</tr>
+% for spec in run_queue:
+% if spec['running_job'] is not None:
+<tr>
+<td><a href="/1.0/job-html/{{spec['running_job']}}">{{spec['running_job']}}</a></td>
+<td><a href="/1.0/lorry-html/{{spec['path']}}">{{spec['path']}}</a></td>
+</tr>
+% end
+% end
+</table>
+% end
+
+<p>See separate list of <a href="/1.0/list-jobs-html">all jobs that
+ have ever been started.</a>.</p>
+
+ <h2>Run-queue</h2>
+
+<table>
+<tr>
+<th>Pos</th>
+<th>Path</th>
+<th>Interval</th>
+<th>Due</th>
+<th>Job?</th>
+</tr>
+% for i, spec in enumerate(run_queue):
+% obj = json.loads(spec['text'])
+% name = obj.keys()[0]
+% fields = obj[name]
+<tr>
+<td>{{i+1}}</td>
+<td><a href="/1.0/lorry-html/{{spec['path']}}">{{spec['path']}}</a></td>
+<td>{{spec['interval_nice']}}</td>
+<td>{{spec['due_nice']}}</td>
+% if spec['running_job']:
+<td><a href="/1.0/job-html/{{spec['running_job']}}">{{spec['running_job']}}</a></td>
+% else:
+<td></td>
+% end
+</tr>
+% end
+</table>
+
+ <hr />
+
+ <p>Scotty says: {{quote}}</p>
+
+ <p>Updated: {{timestamp}}</p>
+
+ </body>
+</html>
diff --git a/test-wait-for-port b/test-wait-for-port
new file mode 100755
index 0000000..22e07be
--- /dev/null
+++ b/test-wait-for-port
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+'''Wait for a given port to be open.
+
+WARNING: This may wait for quite a long time. There is no timeout. Or
+spoon.
+
+'''
+
+import sys, socket, errno
+
+host = sys.argv[1]
+port = int(sys.argv[2])
+
+while True:
+ print "Trying %s port %s" % (host, port)
+ s = socket.socket()
+ try:
+ s.connect((host, port))
+ except socket.error as e:
+ if e.errno == errno.ECONNREFUSED:
+ continue
+ raise
+ s.close()
+ break
diff --git a/units/lighttpd-lorry-controller-webapp.service b/units/lighttpd-lorry-controller-webapp.service
new file mode 100644
index 0000000..0c04fac
--- /dev/null
+++ b/units/lighttpd-lorry-controller-webapp.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Lighttpd Web Server for Lorry Controller WEBAPP
+After=network.target
+
+[Service]
+PermissionsStartOnly=true
+User=lorry
+Group=lorry
+ExecStartPre=/usr/bin/install -d -o lorry -g lorry /run/lighttpd-lorry
+ExecStart=/usr/sbin/lighttpd -f /etc/lighttpd/lorry-controller-webapp-httpd.conf -D
+ExecStopPost=/bin/rm -r /run/lighttpd-lorry
+Restart=always
diff --git a/units/lorry-controller-ls-troves.service b/units/lorry-controller-ls-troves.service
new file mode 100644
index 0000000..fe97811
--- /dev/null
+++ b/units/lorry-controller-ls-troves.service
@@ -0,0 +1,8 @@
+[Unit]
+Description=Lorry Controller ls-troves
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/curl -o /dev/null -X POST --data "" http://localhost:12765/1.0/ls-troves
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-ls-troves.timer b/units/lorry-controller-ls-troves.timer
new file mode 100644
index 0000000..dbd157d
--- /dev/null
+++ b/units/lorry-controller-ls-troves.timer
@@ -0,0 +1,6 @@
+[Unit]
+Description=Lorry Controller ls-troves
+After=lighttpd-lorry-controller-webapp.service
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/units/lorry-controller-minion@.service b/units/lorry-controller-minion@.service
new file mode 100644
index 0000000..b63d996
--- /dev/null
+++ b/units/lorry-controller-minion@.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=Lorry Controller MINION %i
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/lorry-controller-minion --config /etc/lorry-controller/minion.conf
+Restart=always
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-readconf.service b/units/lorry-controller-readconf.service
new file mode 100644
index 0000000..1f73b46
--- /dev/null
+++ b/units/lorry-controller-readconf.service
@@ -0,0 +1,8 @@
+[Unit]
+Description=Lorry Controller read config at startup
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/curl -o /dev/null -X POST --data "" http://localhost:12765/1.0/read-configuration
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-readconf.timer b/units/lorry-controller-readconf.timer
new file mode 100644
index 0000000..7e4f04e
--- /dev/null
+++ b/units/lorry-controller-readconf.timer
@@ -0,0 +1,6 @@
+[Unit]
+Description=Lorry Controller read config at startup
+After=lighttpd-lorry-controller-webapp.service
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/units/lorry-controller-status.service b/units/lorry-controller-status.service
new file mode 100644
index 0000000..381677b
--- /dev/null
+++ b/units/lorry-controller-status.service
@@ -0,0 +1,9 @@
+[Unit]
+Description=Lorry Controller Status update
+After=lighttpd-lorry-controller-webapp.service
+
+[Service]
+ExecStart=/usr/bin/curl -o /dev/null http://localhost:12765/1.0/status
+Restart=no
+User=lorry
+Group=lorry
diff --git a/units/lorry-controller-status.timer b/units/lorry-controller-status.timer
new file mode 100644
index 0000000..1528b8c
--- /dev/null
+++ b/units/lorry-controller-status.timer
@@ -0,0 +1,6 @@
+[Unit]
+Description=Lorry Controller Status update
+After=lighttpd-lorry-controller-webapp.service
+
+[Timer]
+OnUnitInactiveSec=60
diff --git a/yarns.webapp/010-introduction.yarn b/yarns.webapp/010-introduction.yarn
new file mode 100644
index 0000000..ae3af58
--- /dev/null
+++ b/yarns.webapp/010-introduction.yarn
@@ -0,0 +1,77 @@
+% Lorry Controller WEBAPP integration test suite
+% Codethink Ltd
+
+
+Introduction
+============
+
+This is an integration test suite for the WEBAPP component of Lorry
+Controller. It is implemented using the [yarn] tool and uses a style
+of automated testing called "scenario testing" by the tool authors.
+
+[yarn]: http://liw.fi/cmdtest/README.yarn/
+
+As an example, here is a scenario that verifies that the Lorry
+Controller WEBAPP can be started at all:
+
+ SCENARIO WEBAPP can be started at all
+ WHEN WEBAPP --help is requested
+ THEN WEBAPP --help exited with a zero exit code
+
+A scenario consists of a sequence of steps that can be executed by a
+computer. The steps are then defined using IMPLEMENTS:
+
+ IMPLEMENTS WHEN WEBAPP --help is requested
+ if "$SRCDIR/lorry-controller-webapp" --help
+ then
+ exit=0
+ else
+ exit=$?
+ fi
+ echo "$exit" > "$DATADIR/webapp.exit"
+
+And another:
+
+ IMPLEMENTS THEN WEBAPP --help exited with a zero exit code
+ grep -Fx 0 "$DATADIR/webapp.exit"
+
+Yarn will run each scenario in the order it finds them. If all steps
+in a scenario succeed, the scenario succeeds.
+
+Scenarios, though not their implementations, are intended to be
+understandable by people who aren't programmers, though some
+understanding of the technology is required.
+
+For more information, see the documentation for yarn.
+
+
+Test environment and setup
+==========================
+
+In this chapter, we discuss how the environment is set up for tests to
+run in. Yarn provides a temporary directory in which tests can create
+temporary directories, and sets the environment variable `$DATADIR` to
+point at that directory. Yarn also deletes the directory and all of
+its contents at the end, so the test suite itself does not need to do
+that.
+
+We put several files into `$DATADIR`.
+
+* The WEBAPP STATEDB database file.
+* Responses from HTTP queries to WEBAPP.
+* PID of the running WEBAPP.
+
+The purpose of each file is documented with the IMPLEMENTS sections
+that use it, typically with the one that creates it.
+
+Since many scenarios will start an instance of WEBAPP, they also need
+to make sure it gets killed. There are steps for these (`GIVEN a
+running WEBAPP` and `FINALLY WEBAPP is terminated`), which MUST be
+used as a pair in each scenario: having only one of these steps is
+always a bug in the scenario, whereas having neither is OK.
+
+WEBAPP has stores its persistent state in STATEDB, which is an Sqlite
+database on disk. Our tests do _not_ touch it directly, only via WEBAPP,
+so that we do not encode in our tests internals of the database, such
+as the database schema. We do not care: we only care that WEBAPP
+works, and the database schema of STATEDB is _not_ a public interface.
diff --git a/yarns.webapp/020-status.yarn b/yarns.webapp/020-status.yarn
new file mode 100644
index 0000000..5749920
--- /dev/null
+++ b/yarns.webapp/020-status.yarn
@@ -0,0 +1,27 @@
+WEBAPP status reporting
+=======================
+
+WEBAPP reports it status via an HTTP request. We verify that when it
+starts up, the status is that it is doing nothing: there are no jobs,
+it has no Lorry or Trove specs.
+
+ SCENARIO WEBAPP is idle when it starts
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status
+ THEN response is application/json
+ AND response has running_queue set to true
+ AND response has disk_free set
+ AND response has disk_free_mib set
+ AND response has disk_free_gib set
+ AND static status page got updated
+ FINALLY WEBAPP terminates
+
+As an alternative, we can request the HTML rendering of the status
+directly with `/1.0/status-html`.
+
+ SCENARIO WEBAPP provide HTML status directly
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status-html
+ THEN response is text/html
+ AND static status page got updated
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/030-queue-management.yarn b/yarns.webapp/030-queue-management.yarn
new file mode 100644
index 0000000..91a8511
--- /dev/null
+++ b/yarns.webapp/030-queue-management.yarn
@@ -0,0 +1,106 @@
+Run queue management
+====================
+
+This chapter contains tests meant for managing the run-queue in
+WEBAPP.
+
+Start and stop job scheduling
+-----------------------------
+
+The administrator needs to be able to stop WEBAPP from scheduling any
+new jobs, and later to start it again.
+
+ SCENARIO admin can start and stop WEBAPP job scheduling
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status
+ THEN response has running_queue set to true
+
+ WHEN admin makes request POST /1.0/stop-queue with dummy=value
+ AND admin makes request GET /1.0/status
+ THEN response has running_queue set to false
+
+Further, the state change needs to be persistent across WEBAPP
+instances, so we kill the WEBAPP that's currently running, and start a
+new one, and verify that the `running-queue` status is still `true`.
+
+ WHEN WEBAPP is terminated
+ THEN WEBAPP isn't running
+
+ GIVEN a running WEBAPP
+ WHEN admin makes request GET /1.0/status
+ THEN response has running_queue set to false
+
+Start the queue again.
+
+ WHEN admin makes request POST /1.0/start-queue with dummy=value
+ AND admin makes request GET /1.0/status
+ THEN response has running_queue set to true
+
+Finally, clean up.
+
+ FINALLY WEBAPP terminates
+
+
+Read CONFGIT
+------------
+
+We need to be able to get Lorry Controller, specifically WEBAPP, to
+update its configuration and run-queue from CONFGIT using the
+`/1.0/read-configuration` HTTP API request.
+
+First, set up WEBAPP.
+
+ SCENARIO WEBAPP updates its configuration from CONFGIT
+ GIVEN a new git repository in CONFGIT
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+We'll start with an empty configuration. This is the default state
+when WEBAPP has never read its configuration.
+
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Make WEBAPP read an empty configuration. Or rather, a configuration
+that does not match any existing `.lorry` files.
+
+ GIVEN an empty lorry-controller.conf in CONFGIT
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` file, with one Lorry spec, and make sure reading the
+configuration makes `/list-queue` report it.
+
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo"]
+
+If the `.lorry` file is removed, the queue should again become empty.
+
+ GIVEN file CONFGIT/foo.lorry is removed
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add two Lorries, then make sure they can reordered at will.
+
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND Lorry file CONFGIT/bar.lorry with {"bar":{"type":"git","url":"git://bar"}}
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/bar", "upstream/foo"]
+
+ WHEN admin makes request POST /1.0/move-to-top with path=upstream/foo
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo", "upstream/bar"]
+
+ WHEN admin makes request POST /1.0/move-to-bottom with path=upstream/foo
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/bar", "upstream/foo"]
+
+Finally, clean up.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/040-running-jobs.yarn b/yarns.webapp/040-running-jobs.yarn
new file mode 100644
index 0000000..1ffe79d
--- /dev/null
+++ b/yarns.webapp/040-running-jobs.yarn
@@ -0,0 +1,260 @@
+Running jobs
+============
+
+This chapter contains tests that verify that WEBAPP schedules jobs,
+accepts job output, and lets the admin kill running jobs.
+
+Run a job successfully
+----------------------
+
+To start with, with an empty run-queue, nothing should be scheduled.
+
+ SCENARIO run a job
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+We stop the queue first.
+
+ WHEN admin makes request POST /1.0/stop-queue with dummy=value
+
+Then make sure we don't get a job when we reuqest one.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to null
+
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to []
+
+Add a Lorry spec to the run-queue, and request a job. We still
+shouldn't get a job, since the queue isn't set to run yet.
+
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to null
+
+Enable the queue, and off we go.
+
+ WHEN admin makes request POST /1.0/start-queue with dummy=value
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+ AND response has path set to "upstream/foo"
+
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has running_job set to 1
+
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to [1]
+
+Requesting another job should now again return null.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to null
+
+Inform WEBAPP the job is finished.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ THEN response has kill_job set to false
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has running_job set to null
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
+
+
+Limit number of jobs running at the same time
+---------------------------------------------
+
+WEBAPP can be told to limit the number of jobs running at the same
+time.
+
+Set things up. Note that we have two local Lorry files, so that we
+could, in principle, run two jobs at the same time.
+
+ SCENARIO limit concurrent jobs
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND Lorry file CONFGIT/bar.lorry with {"bar":{"type":"git","url":"git://bar"}}
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+
+Check the current set of the `max_jobs` setting.
+
+ WHEN admin makes request GET /1.0/get-max-jobs
+ THEN response has max_jobs set to null
+
+Set the limit to 1.
+
+ WHEN admin makes request POST /1.0/set-max-jobs with max_jobs=1
+ THEN response has max_jobs set to 1
+ WHEN admin makes request GET /1.0/get-max-jobs
+ THEN response has max_jobs set to 1
+
+Get a job. This should succeed.
+
+ WHEN MINION makes request POST /1.0/give-me-job with host=testhost&pid=1
+ THEN response has job_id set to 1
+
+Get a second job. This should not succeed.
+
+ WHEN MINION makes request POST /1.0/give-me-job with host=testhost&pid=2
+ THEN response has job_id set to null
+
+Finish the first job. Then get a new job. This should succeed.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ AND MINION makes request POST /1.0/give-me-job with host=testhost&pid=2
+ THEN response has job_id set to 2
+
+Stop job in the middle
+----------------------
+
+We need to be able to stop jobs while they're running as well. We
+start by setting up everything so that a job is running, the same way
+we did for the successful job scenario.
+
+ SCENARIO stop a job while it's running
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request POST /1.0/start-queue with dummy=value
+ AND admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+ AND response has path set to "upstream/foo"
+
+Admin will now ask WEBAPP to kill the job. This changes sets a field
+in the STATEDB only.
+
+ WHEN admin makes request POST /1.0/stop-job with job_id=1
+ AND admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has kill_job set to true
+
+Now, when MINION updates the job, WEBAPP will tell it to kill it.
+MINION will do so, and then update the job again.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no
+ THEN response has kill_job set to true
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=1
+
+Admin will now see that the job has, indeed, been killed.
+
+ WHEN admin makes request GET /1.0/lorry/upstream/foo
+ THEN response has running_job set to null
+
+ WHEN admin makes request GET /1.0/list-running-jobs
+ THEN response has running_jobs set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
+
+Stop a job that runs too long
+-----------------------------
+
+Sometimes a job gets "stuck" and should be killed. The
+`lorry-controller.conf` has an optional `lorry-timeout` field for
+this, to set the timeout, and WEBAPP will tell MINION to kill a job
+when it has been running too long.
+
+Some setup. Set the `lorry-timeout` to a know value. It doesn't
+matter what it is since we'll be telling WEBAPP to fake its sense of
+time, so that the test suite is not timing sensitive. We wouldn't want
+to have the test suite fail when running on slow devices.
+
+ SCENARIO stop stuck job
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND lorry-controller.conf in CONFGIT has lorry-timeout set to 1 for everything
+ AND Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+
+Pretend it is the start of time.
+
+ WHEN admin makes request POST /1.0/pretend-time with now=0
+ WHEN admin makes request GET /1.0/status
+ THEN response has timestamp set to "1970-01-01 00:00:00 UTC"
+
+Start the job.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+
+Check that the job info contains a start time.
+
+ WHEN admin makes request GET /1.0/job/1
+ THEN response has job_started set
+
+Pretend it is now much later, or at least later than the timeout specified.
+
+ WHEN admin makes request POST /1.0/pretend-time with now=2
+
+Pretend to be a MINION that reports an update on the job. WEBAPP
+should now be telling us to kill the job.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=no
+ THEN response has kill_job set to true
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
+
+Remove a terminated jobs
+------------------------
+
+WEBAPP doesn't remove jobs automatically, it needs to be told to
+remove jobs.
+
+ SCENARIO remove job
+
+Setup.
+
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+ GIVEN Lorry file CONFGIT/foo.lorry with {"foo":{"type":"git","url":"git://foo"}}
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+
+Start job 1.
+
+ WHEN admin makes request POST /1.0/give-me-job with host=testhost&pid=123
+ THEN response has job_id set to 1
+
+Try to remove job 1 while it is running. This should fail.
+
+ WHEN admin makes request POST /1.0/remove-job with job_id=1
+ THEN response has reason set to "still running"
+
+Finish the job.
+
+ WHEN MINION makes request POST /1.0/job-update with job_id=1&exit=0
+ WHEN admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to [1]
+
+Remove it.
+
+ WHEN admin makes request POST /1.0/remove-job with job_id=1
+ AND admin makes request GET /1.0/list-jobs
+ THEN response has job_ids set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/050-troves.yarn b/yarns.webapp/050-troves.yarn
new file mode 100644
index 0000000..8737306
--- /dev/null
+++ b/yarns.webapp/050-troves.yarn
@@ -0,0 +1,76 @@
+Handling of remote Troves
+=========================
+
+This chapter has tests for WEBAPP's handling of remote Troves: getting
+the listing of repositories to mirror from the Trove, and creating
+entries in the run-queue for them.
+
+
+Reading a remote Trove specification from CONFGIT
+-------------------------------------------------
+
+When there's a `troves` section in the Lorry Controller configuration
+file, the WEBAPP should include that in the list of Troves when
+reported.
+
+ SCENARIO a Trove is listed in CONFGIT
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND WEBAPP uses CONFGIT as its configuration directory
+
+Note that we need to fake a remote Trove, using static files, to keep
+test setup simpler.
+
+ AND WEBAPP fakes Trove example-trove
+ AND a running WEBAPP
+
+Initially WEBAPP should report no known Troves, and have an empty
+run-queue.
+
+ WHEN admin makes request GET /1.0/status
+ THEN response has run_queue set to []
+ AND response has troves set to []
+
+Let's add a `troves` section to the configuration file. After WEBAPP
+reads that, it should list the added Trove in status.
+
+ GIVEN lorry-controller.conf in CONFGIT adds trove example-trove
+ AND lorry-controller.conf in CONFGIT has prefixmap example:example for example-trove
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request GET /1.0/status
+ THEN response has troves item 0 field trovehost set to "example-trove"
+
+However, this should not have made WEBAPP to fetch a new list of
+repositories from the remote Trove.
+
+ AND response has run_queue set to []
+
+If we tell WEBAPP to fetch the list, we should see repositories.
+
+ GIVEN remote Trove example-trove has repository example/foo
+ WHEN admin makes request POST /1.0/ls-troves with dummy=value
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["example/foo"]
+
+If we re-read the configuration again, without any changes to it or to
+the fake Trove's repository list, the same Troves and Lorry specs
+should remain in STATEDB. (It wasn't always thus, due to a bug.)
+
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ AND admin makes request GET /1.0/status
+ THEN response has troves item 0 field trovehost set to "example-trove"
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["example/foo"]
+
+If the Trove deletes a repository, we should still keep it locally, to
+avoid disasters. However, it will be removed from the Trove's STATEDB,
+and it won't be lorried anymore.
+
+ GIVEN remote Trove example-trove doesn't have repository example/foo
+ WHEN admin makes request POST /1.0/ls-troves with dummy=value
+ AND admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Cleanup.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/060-validation.yarn b/yarns.webapp/060-validation.yarn
new file mode 100644
index 0000000..989c80b
--- /dev/null
+++ b/yarns.webapp/060-validation.yarn
@@ -0,0 +1,190 @@
+Validation of CONFGIT
+=====================
+
+The CONFGIT repository contains two types of files we should validate:
+the `lorry-controller.conf` file, and the local Lorry files (specified
+by the former file in `lorries` sections).
+
+Validate `lorry-controller.conf`
+--------------------------------
+
+We'll start by validating the `lorry-controller.conf` file. There's
+several aspects here that need to be tested:
+
+* JSON syntax correctness: if the file doesn't parse as JSON, the
+ WEBAPP should cope and shouldn't change STATEDB in any way.
+* Semantic correctness: the file should contain a list of dicts, and
+ each dict should have the right fields with the right kind of
+ values. See the `README` for details. Other fields are also allowed,
+ though ignored. Again, if there's an error, WEBAPP should cope, and
+ probably shouldn't update STATEDB if there are any problems.
+
+The approach for testing this is to set up an empty STATEDB, then get
+WEBAPP to read a `lorry-controller.conf` with various kinds of
+brokenness, and after each read verify that STATEDB is still empty.
+This doesn't test that if the STATEDB wasn't empty it doesn't change
+existing data, but it seems like a reasonable assumption that an
+update happens regardless of previous contents of STATEDB, given how
+SQL transactions work.
+
+In summary:
+
+* Start WEBAPP without a STATEDB, and have it read its config. Verify
+ STATEDB is empty.
+* Add a `lorry-controller.conf` that is broken in some specific way.
+* Tell WEBAPP to re-read its config.
+* Verify that WEBAPP gives an error message.
+* Verify that STATEDB is still empty.
+
+Repeat this for each type of brokenness we want to ensure WEBAPP
+validates for.
+
+ SCENARIO validate lorry-controller.conf
+ GIVEN a new git repository in CONFGIT
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+First of all, have WEBAPP read CONFGIT. This should succeed even if
+the `lorry-controller.conf` file doesn't actually exist.
+
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "Configuration has been updated"
+ AND STATEDB is empty
+
+Add an empty configuration file. This is different from a file
+containing an empty JSON list. It should be treated as an error.
+
+ GIVEN a lorry-controller.conf in CONFGIT containing ""
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+Add a syntactically invalid JSON file.
+
+ GIVEN a lorry-controller.conf in CONFGIT containing "blah blah blah"
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+Replace the bad JSON file with one that has an unknown section (no
+`type` field). Please excuse the non-escaping of double quotes: it's
+an artifact of how yarn steps are implemented and is OK.
+
+ GIVEN a lorry-controller.conf in CONFGIT containing "[{"foo": "bar"}]"
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+What about a section that has a `type` field, but it's set to a
+non-sensical value?
+
+ GIVEN a lorry-controller.conf in CONFGIT containing "[{"type": "BACKUPS!"}]"
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "ERROR"
+ AND STATEDB is empty
+
+Now we're getting to real sections. A `troves` section must have
+`trovehost`, `interval`, `ls-interval`, and `prefixmap` set, and may
+optionally have `ignore` set. The `trovehost` field can't really be
+checked, and `interval` and `ls-interval` don't need much checking: if
+they don't parse as sensible intervals, Lorry Controller will just use
+a default value.
+
+`prefixmap`, however, can have a reasonable check: it shouldn't map
+something to be under the Trove ID of the local Trove, otherwise Lorry
+won't be able to push the repositories. However, at this time, we do
+not have a reasonable way to get the Trove ID of the local Trove, so
+we're skipping implementing that test for now. (FIXME: fix this lack
+of testing.)
+
+Clean up at the end.
+
+ FINALLY WEBAPP terminates
+
+
+Validate local Lorry files
+--------------------------
+
+Lorry files (`.lorry`) are consumed by the Lorry program itself, but
+also by Lorry Controller. In fact, the ones that are in CONFGIT are
+only consumed by Lorry Controller: it reads them in, parses them,
+extracts the relevant information, puts that into STATEDB, and then
+generates a whole new (temporary) file for each Lorry run.
+
+Lorry Controller doesn't validate the Lorry files much, only
+enough that it can extract each separate Lorry specification and feed
+them to Lorry one by one. In other words:
+
+* The `.lorry` file must be valid JSON.
+* It must be a dict.
+* Each key must map to another dict.
+* Each inner dict must have a key `type`, which maps to a string.
+
+Everything else is left for Lorry itself. Lorry Controller only needs
+to handle Lorry not working, and it already does that.
+
+Firstly, some setup.
+
+ SCENARIO validate .lorry files
+ GIVEN a new git repository in CONFGIT
+ AND an empty lorry-controller.conf in CONFGIT
+ AND lorry-controller.conf in CONFGIT adds lorries *.lorry using prefix upstream
+ AND WEBAPP uses CONFGIT as its configuration directory
+ AND a running WEBAPP
+
+Make sure WEBAPP handles there not being any `.lorry` files.
+
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` file that contains broken JSON.
+
+ GIVEN Lorry file CONFGIT/notjson.lorry with THIS IS NOT JSON
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` file that is valid JSON, but is not a dict.
+
+ GIVEN Lorry file CONFGIT/notadict.lorry with [1,2,3]
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` that is a dict, but doesn't map keys to dicts.
+
+ GIVEN Lorry file CONFGIT/notadictofdicts.lorry with { "foo": 1 }
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` whose inner dict does not have a `type` field.
+
+ GIVEN Lorry file CONFGIT/notype.lorry with { "foo": { "bar": "yo" }}
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "has been updated"
+ AND STATEDB is empty
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to []
+
+Add a `.lorry` that is A-OK. This should work even when there are some
+broken ones too.
+
+ GIVEN Lorry file CONFGIT/a-ok.lorry with { "foo": { "type": "git", "url": "git://example.com/foo" }}
+ WHEN admin makes request POST /1.0/read-configuration with dummy=value
+ THEN response matches "has been updated"
+ WHEN admin makes request GET /1.0/list-queue
+ THEN response has queue set to ["upstream/foo"]
+
+Clean up at the end.
+
+ FINALLY WEBAPP terminates
diff --git a/yarns.webapp/900-implementations.yarn b/yarns.webapp/900-implementations.yarn
new file mode 100644
index 0000000..4f87be9
--- /dev/null
+++ b/yarns.webapp/900-implementations.yarn
@@ -0,0 +1,484 @@
+Implementations
+===============
+
+This chapter includes IMPLEMENTS sections for the various steps used
+in scenarios.
+
+Managing a WEBAPP instance
+--------------------------
+
+We're testing a web application (convenivently named WEBAPP, though
+the executable is `lorry-controller-webapp`), so we need to be able to
+start it and stop it in scenarios. We start it as a background
+process, and keep its PID in `$DATADIR/webapp.pid`. When it's time to
+kill it, we kill the process with the PID in that file. This is not
+perfect, though it's good enough for our purposes. It doesn't handle
+running multiple instances at the same time, which we don't need, and
+doens't handle the case of the process dying and the kernel re-using
+the PID for something else, which is quite unlikely.
+
+Start an instance of the WEBAPP, using a random port. Record the PID
+and the port. Listen only on localhost. We use `start-stop-daemon` to
+start the process, so that it can keep running in the background,
+but the shell doesn't wait for it to terminate. This way, WEBAPP will
+be running until it crashes or is explicitly killed.
+
+ IMPLEMENTS GIVEN a running WEBAPP
+ rm -f "$DATADIR/webapp.pid"
+ rm -f "$DATADIR/webapp.port"
+ mkfifo "$DATADIR/webapp.port"
+
+ add_to_config_file "$DATADIR/webapp.conf" \
+ statedb "$DATADIR/webapp.db"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ status-html "$DATADIR/lc-status.html"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ log "$DATADIR/webapp.log"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ log-level debug
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-host 127.0.0.1
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-port-file "$DATADIR/webapp.port"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ static-files "$SRCDIR/static"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ templates "$SRCDIR/templates"
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-real-confgit no
+
+ start-stop-daemon -S -x "$SRCDIR/lorry-controller-webapp" \
+ -b -p "$DATADIR/webapp.pid" -m --verbose \
+ -- \
+ --config "$DATADIR/webapp.conf"
+
+ port=$(cat "$DATADIR/webapp.port")
+ rm -f "$DATADIR/webapp.port"
+ echo "$port" >"$DATADIR/webapp.port"
+
+ # Wait for the WEBAPP to actually be ready, i.e., that it's
+ # listening on its assigned port.
+ "$SRCDIR/test-wait-for-port" 127.0.0.1 "$port"
+
+Kill the running WEBAPP, using the recorded PID. We need to do this
+both as a WHEN and a FINALLY step.
+
+ IMPLEMENTS WHEN WEBAPP is terminated
+ kill_daemon_using_pid_file "$DATADIR/webapp.pid"
+
+ IMPLEMENTS FINALLY WEBAPP terminates
+ kill_daemon_using_pid_file "$DATADIR/webapp.pid"
+
+Also test that WEBAPP isn't running.
+
+ IMPLEMENTS THEN WEBAPP isn't running
+ pid=$(head -n1 "$DATADIR/webapp.pid")
+ if kill -0 "$pid"
+ then
+ echo "process $pid is still running, but shouldn't be" 1>&2
+ exit 1
+ fi
+
+Managing Lorry Controller configuration
+---------------------------------------
+
+We need to be able to create, and change, the `lorry-controller.conf`
+file, and other files, in CONFGIT. First of all, we need to create
+CONFGIT.
+
+ IMPLEMENTS GIVEN a new git repository in (\S+)
+ git init "$DATADIR/$MATCH_1"
+
+Then we need to create an empty `lorry-controller.conf` file there.
+This is not just an empty file, it must be a JSON file that contains
+an empty list object.
+
+ IMPLEMENTS GIVEN an empty lorry-controller.conf in (\S+)
+ printf '[]\n' > "$DATADIR/$MATCH_1/lorry-controller.conf"
+
+Set the contents of `lorry-controller.conf` from a textual form.
+
+ IMPLEMENTS GIVEN a lorry-controller.conf in (\S+) containing "(.*)"$
+ printf '%s\n' "$MATCH_2" > "$DATADIR/$MATCH_1/lorry-controller.conf"
+
+Add a `.lorry` file to be used by a `lorry-controller.conf`.
+
+ IMPLEMENTS GIVEN Lorry file (\S+) with (.*)
+ printf '%s\n' "$MATCH_2" > "$DATADIR/$MATCH_1"
+
+Remove a file. This is actually quite generic, but it's relevant to us
+for `.lorry` files only (when this is being written).
+
+ IMPLEMENTS GIVEN file (\S+) is removed
+ rm "$DATADIR/$MATCH_1"
+
+Add a `lorries` section to a `lorry-controller.conf`. This hardcodes
+most of the configuration.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) adds lorries (\S+) using prefix (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_4"]
+
+ new = {
+ "type": "lorries",
+ "interval": "0s",
+ "prefix": MATCH_4,
+ "globs": [
+ MATCH_3,
+ ],
+ }
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+ with open(filename, "r") as f:
+ obj = json.load(f)
+ obj.append(new)
+ with open(filename, "w") as f:
+ json.dump(obj, f)
+ '
+
+Add a `troves` section to `lorry-controller.conf`. Again, we hardcode
+most of the configuration.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) adds trove (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+
+ new = {
+ "type": "troves",
+ "trovehost": MATCH_3,
+ "protocol": "ssh",
+ "interval": "0s",
+ "ls-interval": "0s",
+ "prefixmap": {},
+ "ignore": [],
+ }
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+ with open(filename, "r") as f:
+ obj = json.load(f)
+ obj.append(new)
+ with open(filename, "w") as f:
+ json.dump(obj, f, indent=4)
+ '
+
+Set the a specific field for all sections in a `lorry-controller.conf`
+file.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) has (\S+) set to (.+) for everything
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_4"]
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+
+ with open(filename, "r") as f:
+ obj = json.load(f)
+
+ for section in obj:
+ section[MATCH_3] = json.loads(MATCH_4)
+
+ with open(filename, "w") as f:
+ json.dump(obj, f, indent=4)
+ '
+
+Set a specific field for a `troves` section.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) sets (\S+) to (\S+) for trove (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_3"]
+ MATCH_5 = os.environ["MATCH_3"]
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+
+ with open(filename, "r") as f:
+ obj = json.load(f)
+
+ for section in obj:
+ if section["type"] in ["trove", "troves"]:
+ if section["trovehost"] == MATCH_5:
+ section[MATCH_3] = json.loads(MATCH_4)
+
+ with open(filename, "w") as f:
+ json.dump(obj, f, indent=4)
+ '
+
+Set the prefixmap for a Trove in a Lorry Controller configuration
+file. Note that the Trove must already be in the configuration file.
+
+ IMPLEMENTS GIVEN (\S+) in (\S+) has prefixmap (\S+):(\S+) for (\S+)
+ python -c '
+ import os
+ import json
+
+ DATADIR = os.environ["DATADIR"]
+ MATCH_1 = os.environ["MATCH_1"]
+ MATCH_2 = os.environ["MATCH_2"]
+ MATCH_3 = os.environ["MATCH_3"]
+ MATCH_4 = os.environ["MATCH_4"]
+ MATCH_5 = os.environ["MATCH_5"]
+
+ filename = os.path.join(DATADIR, MATCH_2, MATCH_1)
+ with open(filename, "r") as f:
+ objs = json.load(f)
+
+ for obj in objs:
+ if obj["type"] == "troves" and obj["trovehost"] == MATCH_5:
+ obj["prefixmap"][MATCH_3] = MATCH_4
+
+ with open(filename, "w") as f:
+ json.dump(objs, f, indent=4)
+ '
+
+We need to be able to tell WEBAPP, when it runs, where the
+configuration directory is.
+
+ IMPLEMENTS GIVEN WEBAPP uses (\S+) as its configuration directory
+ add_to_config_file "$DATADIR/webapp.conf" \
+ configuration-directory "$DATADIR/$MATCH_1"
+
+Make WEBAPP fake access to a Trove using a static file.
+
+ IMPLEMENTS GIVEN WEBAPP fakes Trove (\S+)
+ add_to_config_file "$DATADIR/webapp.conf" \
+ debug-fake-trove "$MATCH_1=$DATADIR/$MATCH_1.trove"
+
+Control the ls listing of a remote Trove.
+
+ IMPLEMENTS GIVEN remote Trove (\S+) has repository (\S+)
+ filename="$DATADIR/$MATCH_1.trove"
+ if [ ! -e "$filename" ]
+ then
+ echo "{}" > "$filename"
+ fi
+ cat "$filename"
+ python -c '
+ import json, os, sys
+ MATCH_2 = os.environ["MATCH_2"]
+ filename = sys.argv[1]
+ with open(filename) as f:
+ data = json.load(f)
+ data["ls-output"] = data.get("ls-output", []) + [MATCH_2]
+ with open(filename, "w") as f:
+ json.dump(data, f)
+ ' "$filename"
+
+Remove a repository from the fake remote Trove.
+
+ IMPLEMENTS GIVEN remote Trove (\S+) doesn't have repository (\S+)
+ filename="$DATADIR/$MATCH_1.trove"
+ if [ ! -e "$filename" ]
+ then
+ echo "{}" > "$filename"
+ fi
+ cat "$filename"
+ python -c '
+ import json, os, sys
+ MATCH_2 = os.environ["MATCH_2"]
+ filename = sys.argv[1]
+ with open(filename) as f:
+ data = json.load(f)
+ paths = data.get("ls-output", [])
+ if MATCH_2 in paths:
+ paths.remove(MATCH_2)
+ data["ls-output"] = paths
+ with open(filename, "w") as f:
+ json.dump(data, f)
+ ' "$filename"
+
+Making and analysing HTTP requests
+---------------------------------
+
+Simple HTTP GET and POST requests are simple. We make the request,
+sending a body if given, and capture the response: HTTP status code,
+response headers, response body.
+
+We make the request using the `curl` command line program, which makes
+capturing the response quite convenient.
+
+HTTP requests can be made by various entities. This does not affect
+test code, but allows for nicer scenario steps.
+
+We check that the HTTP status indicates success, so that every
+scenario doesn't need ot check that separately.
+
+A GET request:
+
+ IMPLEMENTS WHEN admin makes request GET (\S+)
+ > "$DATADIR/response.headers"
+ > "$DATADIR/response.body"
+ port=$(cat "$DATADIR/webapp.port")
+
+ # The timestamp is needed by "THEN static status page got updated"
+ touch "$DATADIR/request.timestamp"
+
+ curl \
+ -D "$DATADIR/response.headers" \
+ -o "$DATADIR/response.body" \
+ --silent --show-error \
+ "http://127.0.0.1:$port$MATCH_1"
+ cat "$DATADIR/response.headers"
+ cat "$DATADIR/response.body"
+ head -n1 "$DATADIR/response.headers" | grep '^HTTP/1\.[01] 200 '
+
+A POST request always has a body. The body consists of `foo=bar`
+pairs, separated by `&` signs.
+
+ IMPLEMENTS WHEN (\S+) makes request POST (\S+) with (.*)
+ > "$DATADIR/response.headers"
+ > "$DATADIR/response.body"
+ port=$(cat "$DATADIR/webapp.port")
+
+ # The timestamp is needed by "THEN static status page got updated"
+ touch "$DATADIR/request.timestamp"
+
+ curl \
+ -D "$DATADIR/response.headers" \
+ -o "$DATADIR/response.body" \
+ --silent --show-error \
+ --request POST \
+ --data "$MATCH_3" \
+ "http://127.0.0.1:$port$MATCH_2"
+ cat "$DATADIR/response.headers"
+ cat "$DATADIR/response.body"
+ head -n1 "$DATADIR/response.headers" | grep '^HTTP/1\.[01] 200 '
+
+Check the Content-Type of the response has the desired type.
+
+ IMPLEMENTS THEN response is (\S+)
+ cat "$DATADIR/response.headers"
+ grep -i "^Content-Type: $MATCH_1" "$DATADIR/response.headers"
+
+A JSON response can then be queried further. The JSON is expected to
+be a dict, so that values are accessed by name from the dict. The
+value is expresssed as a JSON value in the step.
+
+ IMPLEMENTS THEN response has (\S+) set to (.+)
+ cat "$DATADIR/response.body"
+ python -c '
+ import json, os, sys
+ data = json.load(sys.stdin)
+ key = os.environ["MATCH_1"]
+ expected = json.loads(os.environ["MATCH_2"])
+ value = data[key]
+ if value != expected:
+ sys.stderr.write(
+ "Key {key} has value {value}, but "
+ "{expected} was expected".format (
+ key=key, value=value, expected=expected))
+ sys.exit(1)
+ ' < "$DATADIR/response.body"
+
+A JSON response may need to be analysed in more depth. Specifically,
+we may need to look at a list of dicts, as below.
+
+ IMPLEMENTS THEN response has (\S+) item (\d+) field (\S+) set to (\S+)
+ cat "$DATADIR/response.body"
+ python -c '
+ import json, os, sys
+ data = json.load(sys.stdin)
+ print "data:", repr(data)
+ items = os.environ["MATCH_1"]
+ print "items:", repr(items)
+ item = int(os.environ["MATCH_2"])
+ print "item:", repr(item)
+ field = os.environ["MATCH_3"]
+ print "field:", repr(field)
+ print "match3:", repr(os.environ["MATCH_4"])
+ expected = json.loads(os.environ["MATCH_4"])
+ print "expected:", repr(expected)
+ print "data[items]:", repr(data[items])
+ print "data[items][item]:", repr(data[items][item])
+ print "data[items][item][field]:", repr(data[items][item][field])
+ value = data[items][item][field]
+ if value != expected:
+ sys.stderr.write(
+ "Item {item} in {items} has field {field} with "
+ "value {value}, but {expected} was expected".format (
+ item=item, items=items, field=field, value=value,
+ expected=expected))
+ sys.exit(1)
+ ' < "$DATADIR/response.body"
+
+In some cases, such as free disk space, we don't care about the actual
+value, but we do care that it is there.
+
+ IMPLEMENTS THEN response has (\S+) set
+ cat "$DATADIR/response.body"
+ python -c '
+ import json, os, sys
+ data = json.load(sys.stdin)
+ key = os.environ["MATCH_1"]
+ if key not in data:
+ sys.stderr.write(
+ "Key {key} is not set, but was expected to be set".format (
+ key=key))
+ sys.exit(1)
+ ' < "$DATADIR/response.body"
+
+Some responses are just plain text, so we match them with a regexp.
+
+ IMPLEMENTS THEN response matches "(.*)"$
+ cat "$DATADIR/response.body"
+ grep "$MATCH_1" "$DATADIR/response.body"
+
+
+Status web page
+---------------
+
+WEBAPP is expected to update a static HTML pages whenever the
+`/1.0/status` request is made. We configure WEBAPP to write it to
+`$DATADIR/lc-status.html`. We don't test the contents of the page, but
+we do test that it gets updated. We test for the updates by comparing
+the modification time of the file with the time of the request. We
+know the time of the request thanks to the "WHEN admin makes a
+request" step updating the modification time of a file for this
+purpose.
+
+ IMPLEMENTS THEN static status page got updated
+ # test -nt isn't useful: the timestamps might be identical, and
+ # that's OK on filesystems that only store full-second timestamps.
+ # We generate timestamps in (roughly) ISO 8601 format, with stat,
+ # and those can be compared using simple string comparison.
+
+ status=$(stat -c %y "$DATADIR/lc-status.html")
+ request=$(stat -c %y "$DATADIR/request.timestamp")
+ test "$request" = "$status" || test "$request" '<' "$status"
+
+
+STATEDB
+-------
+
+Check that the STATEDB is empty. This means it should exist, and
+should be initialised, but none of the important tables should have
+any rows in them.
+
+ IMPLEMENTS THEN STATEDB is empty
+ test -s "$DATADIR/webapp.db"
+ sqlite3 "$DATADIR/webapp.db" 'SELECT * FROM troves;' | stdin_is_empty
+ sqlite3 "$DATADIR/webapp.db" 'SELECT * FROM lorries;' | stdin_is_empty
diff --git a/yarns.webapp/yarn.sh b/yarns.webapp/yarn.sh
new file mode 100644
index 0000000..3c617e3
--- /dev/null
+++ b/yarns.webapp/yarn.sh
@@ -0,0 +1,56 @@
+# Copyright (C) 2013 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =*= License: GPL-2 =*=
+
+# This file is a yarn shell library for testing Lorry Controller.
+
+
+# Kill a daemon given its pid file. Report whether it got killed or not.
+
+kill_daemon_using_pid_file()
+{
+ local pid=$(head -n1 "$1")
+ if kill -9 "$pid"
+ then
+ echo "Killed daemon running as $pid"
+ else
+ echo "Error killing daemon running as pid $pid"
+ fi
+}
+
+
+# Add a configuration item to a cliapp-style configuration file.
+
+add_to_config_file()
+{
+ if [ ! -e "$1" ]
+ then
+ printf '[config]\n' > "$1"
+ fi
+ printf '%s = %s\n' "$2" "$3" >> "$1"
+}
+
+
+# Ensure the standard input is empty. If not, exit with an error.
+
+stdin_is_empty()
+{
+ if grep . > /dev/null
+ then
+ echo "ERROR: stdin was NOT empty" 1>&2
+ exit 1
+ fi
+}