diff --git a/.gitignore b/.gitignore index f89ab74..a3ef85e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,8 @@ npm-debug.log clar.suite clar.cache ssmtest + +# Sublime text 2 files + +*.sublime-project +*.sublime-workspace diff --git a/README.md b/README.md index 77167c8..be1f886 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,11 @@ playing with duplo blocks. cat theta.json | ./simplex -M 10000 | ./ksimplex -M 10000 > mle.json cat mle.json | ./kmcmc -M 100000 | ./pmcmc -J 1000 -M 500000 --trace > yeaaah.json -[![NPM](https://nodei.co/npm/ssm.png)](https://nodei.co/npm/ssm/) - Maths, methods and algorithms ============================= For more details on the modeling framework and on the algorithms -available in SSM, see the [documentation](https://github.com/standard-analytics/ssm/raw/master/doc/doc.pdf). +available in SSM, see the [documentation](https://github.com/JDureau/ssm/raw/master/doc/doc.pdf). Installation @@ -53,46 +51,48 @@ On OSX with [homebrew](http://brew.sh/) and [pip](https://pypi.python.org/pypi/p brew install jansson zmq gsl node sudo pip install jinja2 sympy python-dateutil - ## Installing S|S|M itself -With [npm](https://npmjs.org/) +Download the library from [this link](http://5.135.176.187:3000/). - npm install -g ssm +Unzip it, and run the following in the terminal: + + cd ssm + npm install -g -Note: requires that all the C and python dependencies have been -installed _before_ as this will also build the standalone C libraries. +Note: this step requires that all the C and python dependencies have been +installed _before_ as it also builds the standalone C libraries. We recommend _not_ to use ```sudo``` for this command. If (and only if) you _have to_ use ```sudo``` to install package globaly (```-g```) then proceed differently: - git clone https://github.com/standard-analytics/ssm.git + git clone https://github.com/JDureau/ssm.git cd ssm - npm install - sudo npm link - + npm install -g Pull requests are welcome for a .gyp file and windows support! -We also recomend that you install [jsontool](http://trentm.com/json/) - npm install -g jsontool +## Last check +To make sure that everything is properly installed and ssm runs smoothly, +try the following: -Tests -===== + cd ssm/examples/tutorial + ssm + +If the model compiles successfully, you're good to go! - npm test -Notes: -The C code is tested with [clar](https://github.com/vmg/clar) (shipped with this package) +## Any problem with the install? +Check out the _Issues_ section of this repository (top right of this webpage), and if no issue solves your problem, post a new one describing it. Usage ===== -What follows use [this example](https://github.com/standard-analytics/ssm/tree/master/examples/tutorial). +What follows use [this example](https://github.com/JDureau/ssm/tree/master/examples/tutorial). All the paths will be relative to this directory. ## Data and parameters (priors) @@ -118,15 +118,16 @@ Parameters (priors) have to be specified in [JSON](http://json.org/) or [JSON-LD $ cat data/pr_v.json - { - "name": "normal", - "distributionParameter" : [ - { "name" : "mean", "value" : 12.5, "unitCode": "DAY" }, - { "name" : "sd", "value" : 3.8265, "unitCode": "DAY" }, - { "name" : "lower", "value" : 0, "unitCode": "DAY" } - ] - } - +```json +{ + "name": "normal", + "distributionParameter" : [ + { "name" : "mean", "value" : 12.5, "unitCode": "DAY" }, + { "name" : "sd", "value" : 3.8265, "unitCode": "DAY" }, + { "name" : "lower", "value" : 0, "unitCode": "DAY" } + ] +} +``` ## Model @@ -139,7 +140,7 @@ combination thereof. The syntax to define a model is fully described as JSON [schema](http://json-schema.org/) -[here](https://raw.github.com/standard-analytics/ssm/master/json-schema/model-schema.json). +[here](https://raw.github.com/JDureau/ssm/master/json-schema/model-schema.json). ### Link to the data @@ -147,13 +148,14 @@ The first thing to do when writting a model is to _link_ it to the data it explains. $ cat ssm.json | json data - - "data": [ - { - "name": "cases", - "require": { "path": "data/data.csv", "fields": ["date", "cases"] }, - } - ] +```json +"data": [ + { + "name": "cases", + "require": { "path": "data/data.csv", "fields": ["date", "cases"] }, + } +] +``` The ```data.require``` property is a link pointing to a time-series. A link is an object with 3 properties: @@ -173,41 +175,42 @@ The same link objects are used to point to the resources that will be used as priors or covariate of the model. $ cat ssm.json | json inputs - - "inputs": [ - { - "name": "r0", - "description": "Basic reproduction number", - "require": { "name": "r0", "path": "data/r0.json" } - }, - { - "name": "v", - "description": "Recovery rate", - "require": { "name": "pr_v", "path": "data/pr_v.json" }, - "transformation": "1/pr_v", - "to_resource": "1/v" - }, - { - "name": "S", - "description": "Number of susceptible", - "require": { "name": "S", "path": "data/S.json" } - }, - { - "name": "I", - "description": "Number of infectious", - "require": { "name": "I", "path": "data/I.json" } - }, - { - "name": "R", - "description": "Number of recovered", - "require": { "name": "R", "path": "data/R.json" } - }, - { - "name": "rep", - "description": "Reporting rate", - "require": { "name": "rep", "path": "data/rep.json" } - } - ], +```json +"inputs": [ + { + "name": "r0", + "description": "Basic reproduction number", + "require": { "name": "r0", "path": "data/r0.json" } + }, + { + "name": "v", + "description": "Recovery rate", + "require": { "name": "pr_v", "path": "data/pr_v.json" }, + "transformation": "1/pr_v", + "to_resource": "1/v" + }, + { + "name": "S", + "description": "Number of susceptible", + "require": { "name": "S", "path": "data/S.json" } + }, + { + "name": "I", + "description": "Number of infectious", + "require": { "name": "I", "path": "data/I.json" } + }, + { + "name": "R", + "description": "Number of recovered", + "require": { "name": "R", "path": "data/R.json" } + }, + { + "name": "rep", + "description": "Reporting rate", + "require": { "name": "rep", "path": "data/rep.json" } + } +], +``` Note that this linking stage also allows to include some _transformations_ so that a relation can be established between your @@ -227,30 +230,30 @@ contains the following properties: the populations $ cat ssm.json | json populations - - "populations": [ - {"name": "NYC", "composition": ["S", "I", "R"]} - ] - +```json +"populations": [ + {"name": "NYC", "composition": ["S", "I", "R"]} +] +``` and the reactions, defining the process model $ cat ssm.json | json reactions - - "reactions": [ - {"from": "S", "to": "I", "rate": "r0/(S+I+R)*v*I", "description": "infection", "accumulators": ["Inc"]}, - {"from": "I", "to": "R", "rate": "v", "description":"recovery"} - ] - +```json +"reactions": [ + {"from": "S", "to": "I", "rate": "r0/(S+I+R)*v*I", "description": "infection", "accumulators": ["Inc"]}, + {"from": "I", "to": "R", "rate": "v", "description":"recovery"} +] +``` Note that the populations object is a list. Structured populatiols can be defined by appending terms to the list. An ```sde``` property can be added in case you want that some parameters follow diffusions (see -[here](https://github.com/standard-analytics/ssm/blob/master/examples/foo/package.json) +[here](https://github.com/JDureau/ssm/blob/master/examples/foo/package.json) for an example, and [here](http://arxiv.org/abs/1203.5950) for references). White environmental noise can also be added to the reaction -as in this [example](https://raw.github.com/standard-analytics/ssm/master/examples/noise/package.json) +as in this [example](https://raw.github.com/JDureau/ssm/master/examples/noise/package.json) (references [here](http://arxiv.org/abs/0802.0021)). The ```accumulators``` property allows to defined new state variable @@ -263,17 +266,18 @@ point related to the accumulator state. One observation model has to be defined per observed time-series. $ cat ssm.json | json observations - - "observations": [ - { - "name": "cases", - "start": "2012-07-26", - "distribution": "discretized_normal", - "mean": "rep * Inc", - "sd": "sqrt(rep * ( 1.0 - rep ) * Inc )" - } - ] - +```json +"observations": [ + { + "name": "cases", + "start": "2012-07-26", + "distribution": "discretized_normal", + "mean": "rep * Inc", + "sd": "sqrt(rep * ( 1.0 - rep ) * Inc )" + } +] +``` +SSM currently implements the `discretized_normal`, `poisson` and `binomial`observation processes. See the developer [documentation](doc/developers/dev_help.md) to contribute and add your favourite distribution. ### Initial conditions @@ -283,26 +287,26 @@ them need need to be defined in a separate JSON file typicaly named inference algorithms: $ cat theta.json - - "resources": [ - { - "name": "values", - "description": "initial values for the parameters", - "data": { - "r0": 25.0, - "pr_v": 11.0 - } - }, - { - "name": "covariance", - "description": "covariance matrix", - "data": { - "r0": {"r0": 0.04, "pr_v": 0.01}, - "pr_v": {"pr_v": 0.02, "r0": 0.01} - } - } - ] - +```json +"resources": [ + { + "name": "values", + "description": "initial values for the parameters", + "data": { + "r0": 25.0, + "pr_v": 11.0 + } + }, + { + "name": "covariance", + "description": "covariance matrix", + "data": { + "r0": {"r0": 0.04, "pr_v": 0.01}, + "pr_v": {"pr_v": 0.02, "r0": 0.01} + } + } +] +``` Only the diagonal terms are mandatory for the covariance matrix. @@ -342,8 +346,10 @@ Let's start by plotting the data with [R](http://www.r-project.org/): - data <- read.csv('../data/data.csv', na.strings='null') - plot(as.Date(data$date), data$cases, type='s') +```R +data <- read.csv('../data/data.csv', na.strings='null') +plot(as.Date(data$date), data$cases, type='s') +``` Let's run a first simulation: @@ -351,8 +357,10 @@ Let's run a first simulation: And add the simulated trajectory to our first plot - traj <- read.csv('X_0.csv') - lines(as.Date(traj$date), traj$cases, type='s', col='red') +```R +traj <- read.csv('X_0.csv') +lines(as.Date(traj$date), traj$cases, type='s', col='red') +``` Let's infer the parameters to get a better fit @@ -361,23 +369,27 @@ Let's infer the parameters to get a better fit let's read the values found: $ cat mle.json | json resources | json -c "this.name=='values'" - [ - { - "name": "values", - "data": { - "pr_v": 19.379285906561037, - "r0": 29.528755614881494 - } - } - ] +```json +[ + { + "name": "values", + "data": { + "pr_v": 19.379285906561037, + "r0": 29.528755614881494 + } + } +] +``` Let's plot the evolution of the parameters: - trace <- read.csv('trace_0.csv') - layout(matrix(1:3,1,3)) - plot(trace$index, trace$r0, type='l') - plot(trace$index, trace$pr_v, type='l') - plot(trace$index, trace$fitness, type='l') +```R +trace <- read.csv('trace_0.csv') +layout(matrix(1:3,1,3)) +plot(trace$index, trace$r0, type='l') +plot(trace$index, trace$pr_v, type='l') +plot(trace$index, trace$fitness, type='l') +``` Now let's redo a simulation with these values (```mle.json```): @@ -385,63 +397,72 @@ Now let's redo a simulation with these values (```mle.json```): and replot the results: - plot(as.Date(data$date), data$cases, type='s') - traj <- read.csv('X_0.csv') - lines(as.Date(traj$date), traj$cases, type='s', col='red') +```R +plot(as.Date(data$date), data$cases, type='s') +traj <- read.csv('X_0.csv') +lines(as.Date(traj$date), traj$cases, type='s', col='red') +``` to realize that the fit is now much better. And now in one line: $ cat theta.json | ./simplex -M 10000 --trace | ./simul --traj | json resources | json -c "this.name=='values'" - [ - { - "name": "values", - "data": { - "r0": 29.528755614881494, - "pr_v": 19.379285906561037 - } - } - ] - +```json +[ + { + "name": "values", + "data": { + "r0": 29.528755614881494, + "pr_v": 19.379285906561037 + } + } +] +``` + Let's get some posteriors and sample some trajectories by adding a pmcmc at the end of our pipeline (we actualy add 2 of them to skip the convergence of the mcmc algorithm). $ cat theta.json | ./simplex -M 10000 | ./pmcmc -M 10000 | ./pmcmc -M 100000 --trace --traj | json resources | json -c 'this.name=="summary"' - - [ - { - "name": "summary", - "data": { - "id": 0, - "log_ltp": -186.70579009197556, - "AICc": 363.94320971360844, - "n_parameters": 2, - "AIC": 363.6765430469418, - "DIC": 363.6802334782078, - "log_likelihood": -179.8382715234709, - "sum_squares": null, - "n_data": 48 - } - } - ] +```json +[ + { + "name": "summary", + "data": { + "id": 0, + "log_ltp": -186.70579009197556, + "AICc": 363.94320971360844, + "n_parameters": 2, + "AIC": 363.6765430469418, + "DIC": 363.6802334782078, + "log_likelihood": -179.8382715234709, + "sum_squares": null, + "n_data": 48 + } + } +] +``` Some posteriors plots (still with R) - trace <- read.csv('trace_0.csv') - layout(matrix(1:2,1,2)) - hist(trace$r0) - hist(trace$pr_v) +```R + trace <- read.csv('trace_0.csv') + layout(matrix(1:2,1,2)) + hist(trace$r0) + hist(trace$pr_v) +``` The sampled trajectories - traj <- read.csv('X_0.csv') - plot(as.Date(data$date), data$cases, type='s') - samples <- unique(traj$index) - for(i in samples){ - lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') - } +```R + traj <- read.csv('X_0.csv') + plot(as.Date(data$date), data$cases, type='s') + samples <- unique(traj$index) + for(i in samples){ + lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') + } +``` ## Be cautious @@ -460,19 +481,21 @@ diagnostic outputs. For instance let's run a particle filter with the ```--diag``` option give us access to the prediction residuals and the effective sample size. Let's plot these quantities - diag <- read.csv('diag_0.csv') - layout(matrix(1:3,3,1)) +```R + diag <- read.csv('diag_0.csv') + layout(matrix(1:3,3,1)) - #data vs prediction - plot(as.Date(data$date), data$cases, type='p') - lines(as.Date(diag$date), diag$pred_cases, type='p', col='red') + #data vs prediction + plot(as.Date(data$date), data$cases, type='p') + lines(as.Date(diag$date), diag$pred_cases, type='p', col='red') - #prediction residuals - plot(as.Date(diag$date), diag$res_cases, type='p') - abline(h=0, lty=2) + #prediction residuals + plot(as.Date(diag$date), diag$res_cases, type='p') + abline(h=0, lty=2) - #effective sample size - plot(as.Date(diag$date), diag$ess, type='s') + #effective sample size + plot(as.Date(diag$date), diag$ess, type='s') +``` ## Parallel computing @@ -486,10 +509,12 @@ envelopes (```--hat```). Let's plot the trajectories - hat <- read.csv('hat_0.csv') - plot(as.Date(hat$date), hat$mean_cases, type='s') - lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2) - lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2) +```R + hat <- read.csv('hat_0.csv') + plot(as.Date(hat$date), hat$mean_cases, type='s') + lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2) + lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2) +``` Your machine is not enough ? You can use several. First let's transform our ```smc``` into a _server_ that will dispatch some work to @@ -532,20 +557,21 @@ xlim on our first plot. For the prediction we ran ```simul``` with the ```--hat``` option that will output empirical credible envelop instead of all the projected trajectories (as does ```--traj```). - - data <- read.csv('../data/data.csv', na.strings='null') - plot(as.Date(data$date), data$cases, type='s', xlim=c(min(as.Date(data$date)), as.Date('2013-12-25'))) - - traj <- read.csv('X_0.csv') #from the previous run - samples <- unique(traj$index) - for(i in samples){ - lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') - } - - hat <- read.csv('hat_0.csv') #from the current run - lines(as.Date(hat$date), hat$mean_cases, type='s' , col='blue') - lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2, col='blue') - lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2, col='blue') +```R + data <- read.csv('../data/data.csv', na.strings='null') + plot(as.Date(data$date), data$cases, type='s', xlim=c(min(as.Date(data$date)), as.Date('2013-12-25'))) + + traj <- read.csv('X_0.csv') #from the previous run + samples <- unique(traj$index) + for(i in samples){ + lines(as.Date(traj$date[traj$index == i]), traj$cases[traj$index == i], type='s', col='red') + } + + hat <- read.csv('hat_0.csv') #from the current run + lines(as.Date(hat$date), hat$mean_cases, type='s' , col='blue') + lines(as.Date(hat$date), hat$lower_cases, type='s', lty=2, col='blue') + lines(as.Date(hat$date), hat$upper_cases, type='s', lty=2, col='blue') +``` License diff --git a/doc/developers/dev_help.md b/doc/developers/dev_help.md new file mode 100644 index 0000000..6866dfb --- /dev/null +++ b/doc/developers/dev_help.md @@ -0,0 +1,208 @@ +# Help documentation for SSM developers + +This document provides some help for developers who wants to contribute to the SSM library by adding new functionalities that require to modify the source code. + +## How to add a new observation process? + +SSM originally implements the `discretized_normal` observation process. Adding a new observation distribution require to modify four files: + +* `src/C/templates/observed_template.c` +* `lib/validate.js` +* `src/Ccoder.py` +* `src/Cmodel.py` + +Let's illustrate this by adding the `poisson` distribution, which is fully parametrized by its `mean` and would be specified in `ssm.json` as follows: + +```json +"observations" : [ + { + "name" : "incidence_observed", + "start" : "1789-07-14", + "distribution" : "poisson", + "mean" : "reporting_rate * incidence" + } + ] +``` + +where `reporting_rate` is the rate at which new cases are reported and would be defined in the `inputs`. + +### Modify `src/C/templates/observed_template.c` + +This file contains the template for the likelihood (`f_likelihood_tpl_`) and random generator (`f_obs_ran_tpl_`) functions of the observation process. Both need to be modified by adding `elif` commands to switch between distributions and make calls to suitable functions of the [GSL](https://www.gnu.org/software/gsl/manual/html_node/The-Poisson-Distribution.html) library to perform the core calculation. + +```C +static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) +{ + double like; + double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + + double gsl_mu = {{ x.mean }}; + double gsl_sd = {{ x.sd }}; + if (y > 0.0) { + like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd) - gsl_cdf_gaussian_P(y - 0.5 - gsl_mu, gsl_sd); + } else { + like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd); + } + + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + like = gsl_ran_poisson_pdf(rint(y), gsl_mu); + + {% endif %} + + return like; +} +``` +```C +static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) +{ + double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + + double gsl_mu = {{ x.mean }}; + double gsl_sd = {{ x.sd }}; + double yobs = gsl_mu + gsl_ran_gaussian(calc->randgsl, gsl_sd); + yobs = (yobs >0) ? yobs : 0.0; + + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + double yobs = gsl_ran_poisson(calc->randgsl, gsl_mu); + + {% endif %} + + return yobs; +} +``` +### Modify `lib/validate.js` + +This file contains the function `module.exports` that checks that the model is semantically complete and correct, and throw error messages otherwise. + +```javascript +model.observations.forEach(function(obs){ + + if(obs.distribution == 'discretized_normal'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + }); + + parseRate(obs.sd).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'sd' %s, the term %s cannot be understood by SSM. Please define it.",obs.sd,t)); + }; + }); + + } else if (obs.distribution == 'poisson'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + }); + }; +}); + +model.observations.forEach(function(obs){ + + if( (obs.distribution != 'discretized_normal') && (obs.distribution != 'poisson')){ + throw new Error("For the moment, SSM only supports 'discretized_normal' and 'poisson' distributions for observations."); + } +}); +``` + +### Modify `src/Ccoder.py` + +This file contains the method `observed` which defines all the parameters needed by SSM to handle the distribution. In particular, several algorithms in SSM perform Gaussian approximations, which require us to define the `mean` and `sd` (standard deviation) parameters. + +```python +def observed(self): + + obs = copy.deepcopy(self.obs_model) + + for x in obs: + if x['distribution'] == 'discretized_normal': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + elif x['distribution'] == 'poisson': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + + return {'observed': obs} +``` +Note that all the parameters of the distribution need to appear in addition to `mean` and `sd`. For instance, if one wants to add a binomial distribution parametrized by its probability `p` and sample size `n`, these two parameters must also be defined here. + +### Modify `src/Cmodel.py` + +The appropriate `mean` and `sd` of the Gaussian approximation must be defined in this file. We can use the fact that when `mean > 1000`, the Poisson distribution can be approximated by a normal distribution with the same `mean` and `sd = sqrt(mean)`. Thus, we only need to compute the standard deviation: + +```python +for o in observations: + if o['distribution'] == 'discretized_normal': + pars = [o['mean'], o['sd']] + elif o['distribution'] == 'poisson': + o['sd'] = 'sqrt('+o['mean']+')' + pars = [o['mean'], o['sd']] + + for p in pars: + el = self.change_user_input(p) + for e in el: + if e not in self.op and e not in self.reserved and e not in self.special_functions and e not in self.par_sv and e not in self.par_noise and e not in self.par_proc and e not in self.par_forced and e not in self.par_inc: + try: + float(e) + except ValueError: + par_obs.add(e) + + self.par_obs = sorted(list(par_obs)) +``` + +### Modify `json-schema/model-schema.json` + +Finally, if the distribution requires parameters names other than `means` and `sd`, you need to specify these in the `observation` field of the schema file. Since the Poisson distribution is parameterized by the `mean` there is no need to edit the file. However, for a binomial distribution with parameters `p` and `n` this would become: + +```json + "observations": { + "type": "array", + "description": "Name each of the observed time series, determine when observations started to be collected and what is the distribution of the observation process.", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "start": { + "type": "string", + "format": "date-time" + }, + "distribution": { + "type": "string" + }, + "mean": { + "type": "string" + }, + "sd": { + "type": "string" + }, + "n": { + "type": "string" + }, + "p": { + "type": "string" + } + }, + "required": [ + "name", + "start", + "distribution" + ] + } + } +``` + + diff --git a/doc/doc.tex b/doc/doc.tex index 23747c0..0b62f18 100644 --- a/doc/doc.tex +++ b/doc/doc.tex @@ -392,7 +392,7 @@ \subsubsection{\label{subsub:sde}Stochastic differential equations} {2pt} ][c]{rCl} dS_t &= &-\beta S_t \frac{I_t}{N} dt - \sigma \beta S_t \frac{I_t}{N} dB^{(1)}_t \\ -dI_t &=& (\beta S_t \frac{I_t}{N} d\Gamma_t -\gamma I_t)dt + \sigma \beta S_t \frac{I_t}{N} dB^{(1)}_t\\ +dI_t &=& (\beta S_t \frac{I_t}{N} - \gamma I_t)dt + \sigma \beta S_t \frac{I_t}{N} dB^{(1)}_t\\ dR_t &=& \gamma I_t dt \end{IEEEeqnarraybox} \right. diff --git a/examples/tutorial/ssm.json b/examples/tutorial/ssm.json index e9f0b4c..654a940 100644 --- a/examples/tutorial/ssm.json +++ b/examples/tutorial/ssm.json @@ -1,43 +1,43 @@ { "data": [ - { - "name": "cases", + { + "name": "cases", "require": { "path": "data/data.csv", "fields": ["date", "cases"] } } ], - + "inputs": [ { - "name": "r0", - "description": "Basic reproduction number", - "require": { "name": "r0", "path": "data/r0.json" } + "name": "r0", + "description": "Basic reproduction number", + "require": { "name": "r0", "path": "data/r0.json" } }, - { + { "name": "v", "description": "Recovery rate", "require": { "name": "pr_v", "path": "data/pr_v.json" }, "transformation": "1/pr_v", - "to_resource": "1/v" + "to_resource": "1/v" }, { - "name": "S", + "name": "S", "description": "Number of susceptible", - "require": { "name": "S", "path": "data/S.json" } + "require": { "name": "S", "path": "data/S.json" } }, - { + { "name": "I", - "description": "Number of infectious", - "require": { "name": "I", "path": "data/I.json" } + "description": "Number of infectious", + "require": { "name": "I", "path": "data/I.json" } }, - { - "name": "R", + { + "name": "R", "description": "Number of recovered", - "require": { "name": "R", "path": "data/R.json" } + "require": { "name": "R", "path": "data/R.json" } }, - { + { "name": "rep", "description": "Reporting rate", - "require": { "name": "rep", "path": "data/rep.json" } + "require": { "name": "rep", "path": "data/rep.json" } } ], diff --git a/json-schema/model-schema.json b/json-schema/model-schema.json index d959d35..3719ed8 100644 --- a/json-schema/model-schema.json +++ b/json-schema/model-schema.json @@ -1,10 +1,7 @@ { "$schema": "http://json-schema.org/draft-04/schema#", - "title": "SSM model", - - "description": "Description of an state space model following the grammar of the SSM inference package", - + "description": "Description of a state space model following the grammar of the SSM inference package", "definitions": { "require": { "type": "object", @@ -12,252 +9,291 @@ "datapackage": { "type": "string" }, - "resource": { - "type": "string" - }, - "fields": { - "type": "array", - "items": { - "type": "string" - } - } + "resource": { + "type": "string" + }, + "fields": { + "type": "array", + "items": { + "type": "string" + } + } } } - }, - - + }, "type": "object", - "properties": { - "populations": { "type": "array", "description": "Grouping of the state variables comprised in your compartmental model. For each group, provide a name and the list of corresponding states. By adding an optional remainder field, you will ensure the population size remains positive.", "items": { "type": "object", "properties": { - "name": { - "type": "string" - }, - "composition": { - "type": "array", - "items": { - "type": "string" - } - }, - "remainder": { - "type": "object", - "description": "Following this instruction, the value of the remainder variable at every time will be determined by the size of the population minus the sum of the other compartments. Note that SSM will discard scenarios where this variable becomes negative.", - "properties": { - "name": { - "type": "string" - }, - "pop_size": { - "type": "string" - } - }, - "required": ["name", "pop_size"] - } - }, - "required": ["name", "composition"] - } + "name": { + "type": "string" + }, + "composition": { + "type": "array", + "items": { + "type": "string" + } + }, + "remainder": { + "type": "object", + "description": "Following this instruction, the value of the remainder variable at every time will be determined by the size of the population minus the sum of the other compartments. Note that SSM will discard scenarios where this variable becomes negative.", + "properties": { + "name": { + "type": "string" + }, + "pop_size": { + "type": "string" + } + }, + "required": [ + "name", + "pop_size" + ] + } + }, + "required": [ + "name", + "composition" + ] + } }, - "reactions": { "type": "array", "description": "Description of the dynamics of your compartmental model. We only consider density-dependent reactions: each rate will be multiplied by the size of the compartment individuals are leaving. See the accumulators and white_noise optional fields for additional options.", "items": { "type": "object", - "properties": { - "from": { - "type": "string" - }, - "to": { - "type": "string" - }, - "rate": { - "type": "string" - }, - "description": { - "type": "string" - }, - "white_noise": { - "type": "object", - "description": "In order to cope with the absence or mis-specification of environmental factors in the model, you can multiply the rate of each reaction by a white gamma noise. To correlate these noises, use the same name. For more information, see Breto et al. 2009, Time series analysis via mechanistic models.", - "properties": { - "name": { - "type": "string" - }, - "sd": { - "type": "string" - } - }, - "required": ["name","sd"] - }, - "accumulators": { - "type": "array", - "description": "When your are monitoring or fitting the integrated flow of a given reaction, you can store it in one or several variables defined in this list. If a given flow variable is repeated in several reactions, it will correspond of the sum of the flows of these reactions.", - "items": { - "type": "string" - } - }, + "properties": { + "from": { + "type": "string" + }, + "to": { + "type": "string" + }, + "rate": { + "type": "string" + }, + "description": { + "type": "string" + }, + "white_noise": { + "type": "object", + "description": "In order to cope with the absence or mis-specification of environmental factors in the model, you can multiply the rate of each reaction by a white gamma noise. To correlate these noises, use the same name. For more information, see Breto et al. 2009, Time series analysis via mechanistic models.", + "properties": { + "name": { + "type": "string" + }, + "sd": { + "type": "string" + } + }, + "required": [ + "name", + "sd" + ] + }, + "accumulators": { + "type": "array", + "description": "When your are monitoring or fitting the integrated flow of a given reaction, you can store it in one or several variables defined in this list. If a given flow variable is repeated in several reactions, it will correspond of the sum of the flows of these reactions.", + "items": { + "type": "string" + } + }, "keywords": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": ["from", "to","rate"] + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "from", + "to", + "rate" + ] } }, - "sde": { "type": "object", "description": "System of stochastic differential equations, defined by their drift vector and dispersion matrix", - "properties":{ + "properties": { "drift": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "State variable which dynamic is determined by this line of the system." - }, - "f": { - "type": ["number", "string"], - "description": "Deterministic component of the equation." - }, - "transformation": { - "type": "string", - "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." - } - }, - "required": ["name","f"] - } - }, - "dispersion": { - "type": "array", - "description": "Dispersion matrix L. It needs to have as many rows as objects in drifts, and as many rows as independent sources of noise.", - "items": { - "type": "array", - "items": { - "type": ["number", "string"] - } - } - } + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "State variable which dynamic is determined by this line of the system." + }, + "f": { + "type": [ + "number", + "string" + ], + "description": "Deterministic component of the equation." + }, + "transformation": { + "type": "string", + "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." + } + }, + "required": [ + "name", + "f" + ] + } + }, + "dispersion": { + "type": "array", + "description": "Dispersion matrix L. It needs to have as many rows as objects in drifts, and as many colums as independent sources of noise.", + "items": { + "type": "array", + "items": { + "type": [ + "number", + "string" + ] + } + } + } }, - "required": ["drift", "dispersion"] + "required": [ + "drift", + "dispersion" + ] }, - "ode": { "type": "array", "description": "System of ordinary differential equations.", - "items": { + "items": { "type": "object", - "properties": { - "name": { - "type": "string", - "description": "State variable which dynamic is determined by this line of the system." - }, - "f": { - "type": ["number", "string"], - "description": "Deterministic component of the equation." - }, - "transformation": { - "type": "string", - "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." - } - }, - "required": ["name","f"] + "properties": { + "name": { + "type": "string", + "description": "State variable which dynamic is determined by this line of the system." + }, + "f": { + "type": [ + "number", + "string" + ], + "description": "Deterministic component of the equation." + }, + "transformation": { + "type": "string", + "description": "In case you want to express the equation in terms of a function of X and avoid applying the Ito formula." + } + }, + "required": [ + "name", + "f" + ] } }, - - "observations": { "type": "array", "description": "Name each of the observed time series, determine when observations started to be collected and what is the distribution of the observation process.", "items": { "type": "object", - "properties": { - "name": { - "type": "string" - }, - "start": { - "type": "string", - "format": "date-time" - }, - "distribution": { - "type": "string" - }, - "mean": { - "type": "string" - }, - "sd": { - "type": "string" - } - }, - "required": ["name", "start","distribution","mean","sd"] + "properties": { + "name": { + "type": "string" + }, + "start": { + "type": "string", + "format": "date-time" + }, + "distribution": { + "type": "string" + }, + "mean": { + "type": "string" + }, + "sd": { + "type": "string" + }, + "n": { + "type": "string" + }, + "p": { + "type": "string" + } + }, + "required": [ + "name", + "start", + "distribution" + ] } }, - "data": { "type": "array", "description": "Link each of the observed variables to a data resource.", "items": { - "type": "object", - "anyOf": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "require": {"$ref": "#/definitions/require"}, - "transformation": { - "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", - "type": "string" - } - }, - "required": ["name", "require"] - } + "type": "object", + "anyOf": [ + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "require": { + "$ref": "#/definitions/require" + }, + "transformation": { + "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", + "type": "string" + } + }, + "required": [ + "name", + "require" + ] + } ] } }, - "inputs": { "type": "array", "description": "Link each of the parameters or covariates at stake in your model to a data resource.", "items": { - "type": "object", - "anyOf": [ - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "require": {"$ref": "#/definitions/require"}, - "transformation": { - "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", - "type": "string" - }, - "to_resource": { - "description": "In order to make predictions after fitting your data, specify how to invert the transformation relation at a later time than t0.", - "type": "string" - } - }, - "required": ["name"] - } - ] - } + "type": "object", + "anyOf": [ + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "require": { + "$ref": "#/definitions/require" + }, + "transformation": { + "description": "When the parameters used in the model are functions of the data resources, specify the relation here.", + "type": "string" + }, + "to_resource": { + "description": "In order to make predictions after fitting your data, specify how to invert the transformation relation at a later time than t0.", + "type": "string" + } + }, + "required": [ + "name" + ] + } + ] + } } } -} +} \ No newline at end of file diff --git a/lib/install.js b/lib/install.js index 871d875..573e915 100644 --- a/lib/install.js +++ b/lib/install.js @@ -28,6 +28,11 @@ module.exports = function(dpkgRoot, dpkg, pathModel, keepSources, emitter, callb "import os", "import sys", "import json", + "class SsmError(Exception):", + "\tdef __init__(self, value):", + "\t\tself.value = value", + "\tdef __str__(self):", + "\t\treturn repr(self.value)", "sys.path.append('" + path.resolve(__dirname, '..', 'src') + "')", "from Builder import Builder", "path_model_coded = '" + pathModel + "'", @@ -37,8 +42,8 @@ module.exports = function(dpkgRoot, dpkg, pathModel, keepSources, emitter, callb "\tb.prepare()", "\tb.code()", "\tb.write_data()", - "except:", - "\tsys.exit(1)" + "except SsmError as err:", + "\tsys.exit(1)"//, ].join('\n'); var templater = spawn('python', ['-c', tplter]); @@ -53,6 +58,8 @@ module.exports = function(dpkgRoot, dpkg, pathModel, keepSources, emitter, callb templater.on('exit', function (code) { + // console.log('CODE', code); + if(code === 0) { var make = spawn('make', ['install'], {cwd: path.join(pathModel, 'C', 'templates')}); make.stdout.setEncoding('utf8'); diff --git a/lib/validate.js b/lib/validate.js index 3b21b09..48d49b4 100644 --- a/lib/validate.js +++ b/lib/validate.js @@ -1,18 +1,18 @@ var fs = require('fs') - , tv4 = require("tv4") - , _ = require('underscore') - , util = require('util'); +, tv4 = require("tv4") +, _ = require('underscore') +, util = require('util'); var op = [ '+', '-', '*', '/', ',', '(', ')' ] - , modelSchema = require('../json-schema/model-schema.json') - , funcsSSM = ['heaviside', 'ramp', 'correct_rate'] - , jsmaths = Object.getOwnPropertyNames(Math); +, modelSchema = require('../json-schema/model-schema.json') +, funcsSSM = ['heaviside', 'ramp', 'correct_rate', 'sigmoid'] +, jsmaths = Object.getOwnPropertyNames(Math); function parseRate(rate){ rate = rate.replace(/\s+/g, ''); var s = '' - , l = []; + , l = []; for (var i = 0; i< rate.length; i++){ if (op.indexOf(rate[i]) !== -1){ @@ -36,7 +36,7 @@ function parseRate(rate){ /** * check that the model is semantically complete and correct */ -module.exports = function(model){ + module.exports = function(model){ if( !tv4.validate(model, modelSchema) ){ // check that model complies with corresponding schema-json @@ -75,7 +75,7 @@ module.exports = function(model){ } }); incidences = _.uniq(incidences); - + // no ode for the moment if('ode' in model){ throw new Error("For the moment, SSM does not support ode's. This will come sortly."); @@ -121,10 +121,10 @@ module.exports = function(model){ model.reactions.forEach(function(r){ if( (remainders.indexOf(r.from) != -1) && ('accumulators' in r) ){ if( r.accumulators.length > 0){ - throw new Error(util.format("The incidence variable %s is ill-defined. As %s has been defined as a remainder state variable, reactions leaving from it will be ignored.",_.first(r.accumulators),r.from)); - } - } - }); + throw new Error(util.format("The incidence variable %s is ill-defined. As %s has been defined as a remainder state variable, reactions leaving from it will be ignored.",_.first(r.accumulators),r.from)); + } + } + }); // all t0's are the same. var t0s = model.observations.map(function(o){return o.start;}); @@ -153,7 +153,7 @@ module.exports = function(model){ } } }); - + // observations exactly map to data if (!_.isEqual(observations,data)){ @@ -162,69 +162,100 @@ module.exports = function(model){ // Observed variables either correspond to state variables or accumulators flows. var allowedTerms = op.concat(parameters, funcsSSM, incidences, jsmaths, 't'); //terms allowed in rates of the process model + model.observations.forEach(function(obs){ - parseRate(obs.mean).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); - }; - }); - parseRate(obs.sd).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In 'observations', 'sd' %s, the term %s cannot be understood by SSM. Please define it.",obs.sd,t)); - }; - }); + + if(obs.distribution == 'discretized_normal'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + }); + + parseRate(obs.sd).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'sd' %s, the term %s cannot be understood by SSM. Please define it.",obs.sd,t)); + }; + }); + + } else if (obs.distribution == 'poisson'){ + + parseRate(obs.mean).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'mean' %s, the term %s cannot be understood by SSM. Please define it.",obs.mean,t)); + }; + + }); + + } else if (obs.distribution == 'binomial'){ + + + parseRate(obs.p).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'p' %s, the term %s cannot be understood by SSM. Please define it.",obs.p,t)); + }; + }); + + parseRate(obs.n).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In 'observations', 'n' %s, the term %s cannot be understood by SSM. Please define it.",obs.n,t)); + }; + }); + + }; + }); - - // SSM only supports discretised_normal observation distribution so far + + // SSM only supports discretised_normal and poisson observation distributions so far model.observations.forEach(function(obs){ - if(obs.distribution != 'discretized_normal'){ - throw new Error("For the moment, SSM only supports 'discretized_normal' distributions for observations."); + if( (obs.distribution != 'discretized_normal') && (obs.distribution != 'poisson') && (obs.distribution != 'binomial')){ + throw new Error("For the moment, SSM only supports 'discretized_normal', 'poisson' and 'binomial' distributions for observations."); } }); - - + if('sde' in model){ // Zero SDE drifts model.sde.drift.forEach(function(d){ if( d.f != 0.0 ){ - throw new Error("For the moment, SSM does not support non-zero drifts in SDE's."); - } - }); + throw new Error("For the moment, SSM does not support non-zero drifts in SDE's."); + } + }); // SDE transformations are understood by SSM allowedTerms = op.concat(parameters, funcsSSM, jsmaths,'t'); //terms allowed in rates of the process model model.sde.drift.forEach(function(r){ if ('transformation' in r){ - parseRate(r.transformation).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In SDE's, 'transformation' %s, the term %s cannot be understood by SSM. Please define it.",r.transformation, t)); - } - }); - } - }); + parseRate(r.transformation).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In SDE's, 'transformation' %s, the term %s cannot be understood by SSM. Please define it.",r.transformation, t)); + } + }); + } + }); // SDE dispersions are understood by SSM model.sde.dispersion.forEach(function(tmp){ tmp.forEach(function(r){ - if (isNaN(parseFloat(r))){ - parseRate(r).forEach(function(t){ - if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ - throw new Error(util.format("In SDE's, the dispersion term %s cannot be understood by SSM. Please define it.",t)); - } - }); - } - }); + if (isNaN(parseFloat(r))){ + parseRate(r).forEach(function(t){ + if( (allowedTerms.indexOf(t) == -1) && (isNaN(parseFloat(t))) ){ + throw new Error(util.format("In SDE's, the dispersion term %s cannot be understood by SSM. Please define it.",t)); + } + }); + } + }); }); // Variables on which SDE's are defined must belong to parameters, but cannot be state variables. model.sde.drift.forEach(function(d){ if ( parameters.indexOf(d.name) == -1 ){ - throw new Error(util.format("The variable %s on which you define an SDE is not defined in 'inputs'. Please define it.",d.name)); - } - if ( (stateVariables.indexOf(d.name) != -1) || (popSizes.indexOf(d.name) != -1) ){ - throw new Error(util.format("SDE's cannot be defined on a state variable or population size (%s).",d.name)); - } - }); + throw new Error(util.format("The variable %s on which you define an SDE is not defined in 'inputs'. Please define it.",d.name)); + } + if ( (stateVariables.indexOf(d.name) != -1) || (popSizes.indexOf(d.name) != -1) ){ + throw new Error(util.format("SDE's cannot be defined on a state variable or population size (%s).",d.name)); + } + }); } }; diff --git a/src/C/core/bayes.c b/src/C/core/bayes.c index aa4e341..c1d9988 100644 --- a/src/C/core/bayes.c +++ b/src/C/core/bayes.c @@ -21,8 +21,8 @@ /** * compute the log of the prob of the proposal */ -ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed, ssm_theta_t *theta, ssm_var_t *var, double sd_fac, ssm_nav_t *nav, int is_mvn) -{ + ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed, ssm_theta_t *theta, ssm_var_t *var, double sd_fac, ssm_nav_t *nav, int is_mvn) + { int i; ssm_parameter_t *p; @@ -59,10 +59,10 @@ ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed diagonal terms so everything generalizes nicely */ - p_tmp /= p->f_der_inv(gsl_vector_get(proposed, p->offset_theta)); + p_tmp /= p->f_der_inv(gsl_vector_get(proposed, p->offset_theta)); //check for numerical issues - if( (isnan(p_tmp)==1) || (isinf(p_tmp)==1) || (p_tmp<0.0) ) { + if( (isnan(p_tmp)==1) || (isinf(p_tmp)==1) || (p_tmp<0.0) ) { return SSM_ERR_PROPOSAL; } @@ -92,8 +92,8 @@ ssm_err_code_t ssm_log_prob_proposal(double *log_proposal, ssm_theta_t *proposed * means it doesn't immediatly return on failure). This is usefull for * the --prior option. */ -ssm_err_code_t ssm_log_prob_prior(double *log_prior, ssm_theta_t *theta, ssm_nav_t *nav, ssm_fitness_t *fitness) -{ + ssm_err_code_t ssm_log_prob_prior(double *log_prior, ssm_theta_t *theta, ssm_nav_t *nav, ssm_fitness_t *fitness) + { int i; ssm_parameter_t *p; int is_err = 0; @@ -129,8 +129,8 @@ ssm_err_code_t ssm_log_prob_prior(double *log_prior, ssm_theta_t *theta, ssm_nav * return accepted (SSM_SUCCESS) or rejected (SSM_MH_REJECTED) or * combination of prob errors and assign fitness->log_prior */ -ssm_err_code_t ssm_metropolis_hastings(ssm_fitness_t *fitness, double *alpha, ssm_theta_t *proposed, ssm_theta_t *theta, gsl_matrix *var, double sd_fac , ssm_nav_t *nav, ssm_calc_t *calc, int is_mvn) -{ + ssm_err_code_t ssm_metropolis_hastings(ssm_fitness_t *fitness, double *alpha, ssm_theta_t *proposed, ssm_theta_t *theta, gsl_matrix *var, double sd_fac , ssm_nav_t *nav, ssm_calc_t *calc, int is_mvn) + { double ran; ssm_err_code_t success = SSM_SUCCESS; double lproposal, lproposal_prev, lprior_prev; @@ -162,8 +162,8 @@ ssm_err_code_t ssm_metropolis_hastings(ssm_fitness_t *fitness, double *alpha, ss * (depending on the iteration value and options) and the evaluated * tuning factor sd_fac */ -ssm_var_t *ssm_adapt_eps_var_sd_fac(double *sd_fac, ssm_adapt_t *a, ssm_var_t *var, ssm_nav_t *nav, int m) -{ + ssm_var_t *ssm_adapt_eps_var_sd_fac(double *sd_fac, ssm_adapt_t *a, ssm_var_t *var, ssm_nav_t *nav, int m) + { // evaluate epsilon(m) = epsilon(m-1) * exp(a^(m-1) * (acceptance_rate(m-1) - 0.234)) if ( (m > a->eps_switch) && ( m * a->ar < a->m_switch) ) { @@ -234,8 +234,8 @@ int ssm_par_copy(ssm_par_t *dest, ssm_par_t *src) * Other caveat: D_J_p_X are in [N_DATA+1] ([0] contains the initial conditions) * select is in [N_DATA], times is in [N_DATA] */ -void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness) -{ + void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness) + { int j_sel; //the selected particle int n, nn, indn; @@ -252,9 +252,12 @@ void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data //print traj of ancestors of particle j_sel; - //!!! we assume that the last data point contain information' + //!!! below we assume that the last data point contain information' ssm_X_copy(D_X[data->n_obs], D_J_X[data->n_obs][j_sel]); + // take ancestor of last data point + j_sel = fitness->select[data->n_obs - 1][j_sel]; + //printing all ancesters up to previous observation time for(nn = (data->ind_nonan[data->n_obs_nonan-1]-1); nn > data->ind_nonan[data->n_obs_nonan-2]; nn--) { ssm_X_copy(D_X[nn + 1], D_J_X[nn + 1][j_sel]); @@ -263,9 +266,92 @@ void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data for(n = (data->n_obs_nonan-2); n >= 1; n--) { //indentifying index of the path that led to sampled particule indn = data->ind_nonan[n]; + ssm_X_copy(D_X[indn + 1], D_J_X[indn + 1][j_sel]); + j_sel = fitness->select[indn][j_sel]; + + //printing all ancesters up to previous observation time + for(nn= (indn-1); nn > data->ind_nonan[n-1]; nn--) { + ssm_X_copy(D_X[nn + 1], D_J_X[nn + 1][j_sel]); + } + } + + + indn = data->ind_nonan[0]; + ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); + + j_sel = fitness->select[indn][j_sel]; + + for(nn=indn; nn>=-1; nn--) { + ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); + } + +} + + +// this is a function used for debugging a bug in retrieving particle genealogy +// it's similar to ssm_sample_traj but take the j_select as input +// j_select is returned by ssm_sample_traj_print2 and allow to compare the print and non-print version of this function +// we keep it in case we need to debugg it in the future +void ssm_sample_traj2(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness,const int j_select) +{ + int j_sel; //the selected particle + int n, nn, indn; + + double ran, cum_weights; + + ran=gsl_ran_flat(calc->randgsl, 0.0, 1.0); + + j_sel=0; + cum_weights=fitness->weights[0]; + + while (cum_weights < ran) { + cum_weights += fitness->weights[++j_sel]; + } + + j_sel = j_select; + + // /*test*/ + // FILE *test_file = fopen("/Users/Tonton/work/projects/hev-modelling/ssm/SIR_ssm/pmcmc/X_sampled_0.csv", "a"); + // if (test_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + + // fprintf(test_file, "%i\n", j_sel); + // fclose(test_file); + // /*test*/ + + //print traj of ancestors of particle j_sel; + + //!!! we assume that the last data point contain information' + fprintf(stderr, "data->n_obs = %i\n", data->n_obs); + ssm_X_copy(D_X[data->n_obs], D_J_X[data->n_obs][j_sel]); + + //printing all ancesters up to previous observation time + fprintf(stderr, "data->n_obs_nonan = %i\n", data->n_obs_nonan); + fprintf(stderr, "(data->ind_nonan[data->n_obs_nonan - 1] - 1) = %i\n", (data->ind_nonan[data->n_obs_nonan - 1] - 1)); + fprintf(stderr, "data->ind_nonan[data->n_obs_nonan-2] = %i\n", data->ind_nonan[data->n_obs_nonan-2]); + + // for(nn = 0; nn < data->n_obs_nonan; nn++){ + // fprintf(stderr, "data->ind_nonan[%i] = %i\n", nn, data->ind_nonan[nn]); + // } + + j_sel = fitness->select[data->n_obs - 1][j_sel]; + + for(nn = (data->ind_nonan[data->n_obs_nonan - 1] - 1); nn > data->ind_nonan[data->n_obs_nonan-2]; nn--) { + fprintf(stderr, "nn = %i\n", nn); + ssm_X_copy(D_X[nn+1], D_J_X[nn+1][j_sel]); + } + + for(n = (data->n_obs_nonan-2); n >= 1; n--) { + //indentifying index of the path that led to sampled particule + indn = data->ind_nonan[n]; ssm_X_copy(D_X[indn + 1], D_J_X[indn + 1][j_sel]); + j_sel = fitness->select[indn][j_sel]; + //printing all ancesters up to previous observation time for(nn= (indn-1); nn > data->ind_nonan[n-1]; nn--) { ssm_X_copy(D_X[nn + 1], D_J_X[nn + 1][j_sel]); @@ -273,12 +359,18 @@ void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data } indn = data->ind_nonan[0]; + ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); + j_sel = fitness->select[indn][j_sel]; - for(nn=indn; nn>=0; nn--) { + for(nn=indn; nn>=-1; nn--) { ssm_X_copy(D_X[nn + 1], D_J_X[ nn + 1 ][j_sel]); } + // j_sel = fitness->select[indn][j_sel]; + + //TODO nn=-1 (for initial conditions) } + diff --git a/src/C/core/fitness.c b/src/C/core/fitness.c index d9f1b11..08f0383 100644 --- a/src/C/core/fitness.c +++ b/src/C/core/fitness.c @@ -24,11 +24,12 @@ * scale by making sure that everything below fitness->log_like_min * is fitness->log_like_min*row->ts_nonan_length. */ -double ssm_sanitize_log_likelihood(double log_like, ssm_row_t *row, ssm_fitness_t *fitness, ssm_nav_t *nav) -{ + double ssm_sanitize_log_likelihood(double log_like, ssm_row_t *row, ssm_fitness_t *fitness, ssm_nav_t *nav) + { if ((isinf(log_like)==1) || (isnan(log_like)==1) ) { //error if (nav->print & SSM_PRINT_WARNING) { ssm_print_warning("error likelihood computation"); + // fprintf(stderr, "at time %u\n", row->time); } return fitness->log_like_min * row->ts_nonan_length; } else { @@ -88,17 +89,17 @@ void ssm_dic_init(ssm_fitness_t *fitness, double log_like, double log_prior) void ssm_dic_update(ssm_fitness_t *fitness, double log_like, double log_prior) { if( log_like > fitness->summary_log_likelihood){ - fitness->summary_log_likelihood = log_like; - } - if( (log_like+log_prior) > fitness->summary_log_ltp){ - fitness->summary_log_ltp = (log_like+log_prior); - } - - double deviance = -2*log_like; - fitness->_deviance_cum += deviance; - if( deviance < fitness->_min_deviance){ - fitness->_min_deviance = deviance; - } + fitness->summary_log_likelihood = log_like; + } + if( (log_like+log_prior) > fitness->summary_log_ltp){ + fitness->summary_log_ltp = (log_like+log_prior); + } + + double deviance = -2*log_like; + fitness->_deviance_cum += deviance; + if( deviance < fitness->_min_deviance){ + fitness->_min_deviance = deviance; + } } void ssm_dic_end(ssm_fitness_t *fitness, ssm_nav_t *nav, int m) diff --git a/src/C/core/print.c b/src/C/core/print.c index f1a3510..29b464e 100644 --- a/src/C/core/print.c +++ b/src/C/core/print.c @@ -18,8 +18,8 @@ #include "ssm.h" -void ssm_print_log(char *data) -{ + void ssm_print_log(char *data) + { #if SSM_JSON json_t *root; root = json_pack("{s,s,s,s}", "id", "log", "data", data); @@ -88,79 +88,79 @@ void ssm_pipe_theta(FILE *stream, json_t *jparameters, ssm_theta_t *theta, ssm_v } else if ((strcmp(name, "covariance") == 0)) { jcovariance = el; - } else if ((strcmp(name, "summary") == 0)) { - jsummary = el; - } - } - - json_t *jsummarydata = json_object(); - json_object_set_new(jsummarydata, "id", json_integer(opts->id)); - json_object_set_new(jsummarydata, "AIC", isnan(fitness->AIC) ? json_null(): json_real(fitness->AIC)); - json_object_set_new(jsummarydata, "AICc", isnan(fitness->AICc) ? json_null(): json_real(fitness->AICc)); - json_object_set_new(jsummarydata, "DIC", isnan(fitness->DIC) ? json_null(): json_real(fitness->DIC)); - json_object_set_new(jsummarydata, "log_likelihood", isnan(fitness->summary_log_likelihood) ? json_null(): json_real(fitness->summary_log_likelihood)); - json_object_set_new(jsummarydata, "log_ltp", isnan(fitness->summary_log_ltp) ? json_null(): json_real(fitness->summary_log_ltp)); - json_object_set_new(jsummarydata, "sum_squares", isnan(fitness->summary_sum_squares) ? json_null(): json_real(fitness->summary_sum_squares)); - json_object_set_new(jsummarydata, "n_parameters", json_integer(nav->theta_all->length)); - json_object_set_new(jsummarydata, "n_data", json_integer(fitness->n)); - - if(!jsummary){ - json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "summary", "data", jsummarydata)); - } else{ - json_object_set_new(jsummary, "data", jsummarydata); - } - - if(var){ - json_t *jdata = json_object(); - - for(i=0; itheta_all->length; i++){ - json_t *jrow = json_object(); - for(j=0; jtheta_all->length; j++){ - x = gsl_matrix_get(var, nav->theta_all->p[i]->offset_theta, nav->theta_all->p[j]->offset_theta); - if(x){ - json_object_set_new(jrow, nav->theta_all->p[j]->name, json_real(x)); - } - } - if(json_object_size(jrow)){ - json_object_set_new(jdata, nav->theta_all->p[i]->name, jrow); - } else { - json_decref(jrow); + } else if ((strcmp(name, "summary") == 0)) { + jsummary = el; + } + } + + json_t *jsummarydata = json_object(); + json_object_set_new(jsummarydata, "id", json_integer(opts->id)); + json_object_set_new(jsummarydata, "AIC", isnan(fitness->AIC) ? json_null(): json_real(fitness->AIC)); + json_object_set_new(jsummarydata, "AICc", isnan(fitness->AICc) ? json_null(): json_real(fitness->AICc)); + json_object_set_new(jsummarydata, "DIC", isnan(fitness->DIC) ? json_null(): json_real(fitness->DIC)); + json_object_set_new(jsummarydata, "log_likelihood", isnan(fitness->summary_log_likelihood) ? json_null(): json_real(fitness->summary_log_likelihood)); + json_object_set_new(jsummarydata, "log_ltp", isnan(fitness->summary_log_ltp) ? json_null(): json_real(fitness->summary_log_ltp)); + json_object_set_new(jsummarydata, "sum_squares", isnan(fitness->summary_sum_squares) ? json_null(): json_real(fitness->summary_sum_squares)); + json_object_set_new(jsummarydata, "n_parameters", json_integer(nav->theta_all->length)); + json_object_set_new(jsummarydata, "n_data", json_integer(fitness->n)); + + if(!jsummary){ + json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "summary", "data", jsummarydata)); + } else{ + json_object_set_new(jsummary, "data", jsummarydata); + } + + if(var){ + json_t *jdata = json_object(); + + for(i=0; itheta_all->length; i++){ + json_t *jrow = json_object(); + for(j=0; jtheta_all->length; j++){ + x = gsl_matrix_get(var, nav->theta_all->p[i]->offset_theta, nav->theta_all->p[j]->offset_theta); + if(x){ + json_object_set_new(jrow, nav->theta_all->p[j]->name, json_real(x)); } } - - if(json_object_size(jdata)){ - if(!jcovariance){ - json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "covariance", "data", jdata)); - } else{ - json_object_set_new(jcovariance, "data", jdata); - } + if(json_object_size(jrow)){ + json_object_set_new(jdata, nav->theta_all->p[i]->name, jrow); } else { - json_decref(jdata); + json_decref(jrow); } - } - - if(strcmp(opts->next, "") != 0){ + } + + if(json_object_size(jdata)){ + if(!jcovariance){ + json_array_append_new(jresources, json_pack("{s,s,s,o}", "name", "covariance", "data", jdata)); + } else{ + json_object_set_new(jcovariance, "data", jdata); + } + } else { + json_decref(jdata); + } +} + +if(strcmp(opts->next, "") != 0){ char path[SSM_STR_BUFFSIZE]; snprintf(path, SSM_STR_BUFFSIZE, "%s/%s%d.json", opts->root, opts->next, opts->id); json_dump_file(jparameters, path, JSON_INDENT(2)); - } else { +} else { json_dumpf(jparameters, stdout, JSON_COMPACT); printf("\n"); fflush(stdout); - } +} } /** * remove summary (if any) and pipe hat. This is typicaly used for simulations */ -void ssm_pipe_hat(FILE *stream, json_t *jparameters, ssm_input_t *input, ssm_hat_t *hat, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, ssm_options_t *opts, double t) -{ + void ssm_pipe_hat(FILE *stream, json_t *jparameters, ssm_input_t *input, ssm_hat_t *hat, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, ssm_options_t *opts, double t) + { int i, index; double x; json_t *jresources = json_object_get(jparameters, "resources"); json_t *jsummary = NULL; int index_summary; - + for(index=0; index< json_array_size(jresources); index++){ json_t *el = json_array_get(jresources, index); @@ -173,23 +173,23 @@ void ssm_pipe_hat(FILE *stream, json_t *jparameters, ssm_input_t *input, ssm_hat json_object_set_new(values, nav->theta_all->p[i]->name, json_real(x)); } } else if (strcmp(name, "summary") == 0){ - jsummary = el; - index_summary = index; - } - } - - if(jsummary){ - json_array_remove(jresources, index_summary); - } - - if(strcmp(opts->next, "") != 0){ - char path[SSM_STR_BUFFSIZE]; - snprintf(path, SSM_STR_BUFFSIZE, "%s/%s%d.json", opts->root, opts->next, opts->id); - json_dump_file(jparameters, path, JSON_INDENT(2)); - } else { - json_dumpf(jparameters, stdout, JSON_COMPACT); printf("\n"); - fflush(stdout); - } + jsummary = el; + index_summary = index; + } + } + + if(jsummary){ + json_array_remove(jresources, index_summary); + } + + if(strcmp(opts->next, "") != 0){ + char path[SSM_STR_BUFFSIZE]; + snprintf(path, SSM_STR_BUFFSIZE, "%s/%s%d.json", opts->root, opts->next, opts->id); + json_dump_file(jparameters, path, JSON_INDENT(2)); + } else { + json_dumpf(jparameters, stdout, JSON_COMPACT); printf("\n"); + fflush(stdout); + } } @@ -264,7 +264,7 @@ void ssm_print_X(FILE *stream, ssm_X_t *p_X, ssm_par_t *par, ssm_nav_t *nav, ssm #endif } - for(i=0; iobserved_length; i++){ + for(i=0; iobserved_length; i++){ observed = nav->observed[i]; #if SSM_JSON json_object_set_new(jout, observed->name, json_real(observed->f_obs_mean(p_X, par, calc, t))); @@ -273,8 +273,8 @@ void ssm_print_X(FILE *stream, ssm_X_t *p_X, ssm_par_t *par, ssm_nav_t *nav, ssm #endif } - char key[SSM_STR_BUFFSIZE]; - for(i=0; iobserved_length; i++){ + char key[SSM_STR_BUFFSIZE]; + for(i=0; iobserved_length; i++){ observed = nav->observed[i]; snprintf(key, SSM_STR_BUFFSIZE, "ran_%s", observed->name); #if SSM_JSON @@ -295,20 +295,20 @@ void ssm_print_X(FILE *stream, ssm_X_t *p_X, ssm_par_t *par, ssm_nav_t *nav, ssm -void ssm_print_header_trace(FILE *stream, ssm_nav_t *nav) -{ - int i; - for(i=0; i < nav->theta_all->length; i++) { - fprintf(stream, "%s,", nav->theta_all->p[i]->name); + void ssm_print_header_trace(FILE *stream, ssm_nav_t *nav) + { + int i; + for(i=0; i < nav->theta_all->length; i++) { + fprintf(stream, "%s,", nav->theta_all->p[i]->name); + } + fprintf(stream, "fitness,index\n"); } - fprintf(stream, "fitness,index\n"); -} /** * fitness is either log likelihood or sum of square */ -void ssm_print_trace(FILE *stream, ssm_theta_t *theta, ssm_nav_t *nav, const double fitness, const int index) -{ + void ssm_print_trace(FILE *stream, ssm_theta_t *theta, ssm_nav_t *nav, const double fitness, const int index) + { int i; ssm_parameter_t *parameter; @@ -409,37 +409,37 @@ void ssm_print_pred_res(FILE *stream, ssm_X_t **J_X, ssm_par_t *par, ssm_nav_t * var_obs += observed->f_obs_var(J_X[j], par, calc, t); } - if(fitness->J > 1){ - var_state = M2/(kn - 1.0); - } else { - var_state = 0; - } + if(fitness->J > 1){ + var_state = M2/(kn - 1.0); + } else { + var_state = 0; + } - var_obs /= ((double) fitness->J); + var_obs /= ((double) fitness->J); - res = (y - pred)/sqrt(var_state + var_obs); - } + res = (y - pred)/sqrt(var_state + var_obs); + } #if SSM_JSON - snprintf(key, SSM_STR_BUFFSIZE, "pred_%s", observed->name); - json_object_set_new(jout, key, json_real(pred)); + snprintf(key, SSM_STR_BUFFSIZE, "pred_%s", observed->name); + json_object_set_new(jout, key, json_real(pred)); - snprintf(key, SSM_STR_BUFFSIZE, "res_%s", observed->name); - json_object_set_new(jout, key, (isnan(res)==1)? json_null(): json_real(res)); + snprintf(key, SSM_STR_BUFFSIZE, "res_%s", observed->name); + json_object_set_new(jout, key, (isnan(res)==1)? json_null(): json_real(res)); #else - tmp_pred[observed->offset] = pred; - tmp_res[observed->offset] = res; + tmp_pred[observed->offset] = pred; + tmp_res[observed->offset] = res; #endif - } + } #if SSM_JSON - json_object_set_new(jout, "ess", (isnan(fitness->ess_n)==1)? json_null(): json_real(fitness->ess_n)); - ssm_json_dumpf(stream, "predres", jout); + json_object_set_new(jout, "ess", (isnan(fitness->ess_n)==1)? json_null(): json_real(fitness->ess_n)); + ssm_json_dumpf(stream, "predres", jout); #else - for(ts=0; tsts_length; ts++){ - fprintf(stream, "%g,%g,", tmp_pred[ts], tmp_res[ts]); - } - fprintf(stream, "%g\n", fitness->ess_n); + for(ts=0; tsts_length; ts++){ + fprintf(stream, "%g,%g,", tmp_pred[ts], tmp_res[ts]); +} +fprintf(stream, "%g\n", fitness->ess_n); #endif } @@ -463,10 +463,10 @@ void ssm_print_header_hat(FILE *stream, ssm_nav_t *nav) } for(i=0; iobserved_length; i++){ - fprintf(stream, ",mean_%s,lower_%s,upper_%s", nav->observed[i]->name, nav->observed[i]->name, nav->observed[i]->name); - } + fprintf(stream, ",mean_%s,lower_%s,upper_%s", nav->observed[i]->name, nav->observed[i]->name, nav->observed[i]->name); + } - fprintf(stream, "\n"); + fprintf(stream, "\n"); } @@ -563,8 +563,8 @@ void ssm_print_hat(FILE *stream, ssm_hat_t *hat, ssm_nav_t *nav, ssm_row_t *row) * Other caveat: D_J_p_X are in [N_DATA+1] ([0] contains the initial conditions) * select is in [N_DATA], times is in [N_DATA] */ -void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index) -{ + void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index) + { int j_sel; int n, nn, indn; @@ -622,6 +622,73 @@ void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_n } +// this is a function used for debugging a bug in retrieving particle genealogy +// it's similar to ssm_sample_traj but print the sampled particle +// it's similar to ssm_sample_traj_print but return j_sel_start +// j_sel_start is then used by by ssm_sample_traj2 and allow to compare the print and non-print version of this function +// we keep it in case we need to debugg it in the future +int ssm_sample_traj_print2(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index) + { + int j_sel, j_sel_start; + int n, nn, indn; + + double ran, cum_weights; + + ssm_X_t *X_sel; + + ran=gsl_ran_flat(calc->randgsl, 0.0, 1.0); + + j_sel=0; + cum_weights=fitness->weights[0]; + + while (cum_weights < ran) { + cum_weights += fitness->weights[++j_sel]; + } + + j_sel_start = j_sel; + + //print traj of ancestors of particle j_sel; + + //!!! we assume that the last data point contain information' + X_sel = D_J_X[data->n_obs][j_sel]; // N_DATA-1 <=> data->indn_data_nonan[N_DATA_NONAN-1] + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[data->n_obs-1], index); + + //printing all ancesters up to previous observation time + for(nn = (data->ind_nonan[data->n_obs_nonan-1]-1); nn > data->ind_nonan[data->n_obs_nonan-2]; nn--) { + X_sel = D_J_X[ nn + 1 ][j_sel]; + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[nn], index); + } + + for(n = (data->n_obs_nonan-2); n >= 1; n--) { + //indentifying index of the path that led to sampled particule + indn = data->ind_nonan[n]; + j_sel = fitness->select[indn][j_sel]; + X_sel = D_J_X[ indn + 1 ][j_sel]; + + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[indn], index); + + //printing all ancesters up to previous observation time + for(nn= (indn-1); nn > data->ind_nonan[n-1]; nn--) { + X_sel = D_J_X[ nn + 1 ][j_sel]; + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[nn], index); + } + } + + indn = data->ind_nonan[0]; + j_sel = fitness->select[indn][j_sel]; + X_sel = D_J_X[indn+1][j_sel]; + + for(nn=indn; nn>=0; nn--) { + X_sel = D_J_X[ nn + 1 ][j_sel]; + ssm_print_X(stream, X_sel, par, nav, calc, data->rows[nn], index); + } + + //TODO nn=-1 (for initial conditions) + + return(j_sel_start); + +} + void ssm_print_header_ar(FILE *stream) { fprintf(stream, "ar,ar_smoothed,eps,index\n"); diff --git a/src/C/core/special_functions.c b/src/C/core/special_functions.c index 43f73c4..4e701f3 100644 --- a/src/C/core/special_functions.c +++ b/src/C/core/special_functions.c @@ -46,3 +46,14 @@ double slowstep(double x, double d) { return ( x >= 0.0 ? ( x >= d ? d : x ) : 0.0 ); } + +/** + * Sigmoid function: decreases from 1 to 0. + * x is typically t + * shape controls the steep of the sigmoid (the greater the steeper) + * shift controls how far from 0 the midpoint is shifted +*/ +double sigmoid(double x, double shape, double shift) +{ + return ( 1 / ( 1 + exp( - shape * ( x - shift ) ) ) ); +} diff --git a/src/C/core/ssm.h b/src/C/core/ssm.h index d5d9c52..d2857ab 100644 --- a/src/C/core/ssm.h +++ b/src/C/core/ssm.h @@ -733,6 +733,7 @@ void ssm_print_pred_res(FILE *stream, ssm_X_t **J_X, ssm_par_t *par, ssm_nav_t * void ssm_print_header_hat(FILE *stream, ssm_nav_t *nav); void ssm_print_hat(FILE *stream, ssm_hat_t *hat, ssm_nav_t *nav, ssm_row_t *row); void ssm_sample_traj_print(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index); +int ssm_sample_traj_print2(FILE *stream, ssm_X_t ***D_J_X, ssm_par_t *par, ssm_nav_t *nav, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int index); void ssm_print_header_ar(FILE *stream); void ssm_print_ar(FILE *stream, ssm_adapt_t *adapt, const int index); @@ -751,6 +752,7 @@ void ssm_theta_ran(ssm_theta_t *proposed, ssm_theta_t *theta, ssm_var_t *var, do int ssm_theta_copy(ssm_theta_t *dest, ssm_theta_t *src); int ssm_par_copy(ssm_par_t *dest, ssm_par_t *src); void ssm_sample_traj(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness); +void ssm_sample_traj2(ssm_X_t **D_X, ssm_X_t ***D_J_X, ssm_calc_t *calc, ssm_data_t *data, ssm_fitness_t *fitness, const int j_select); /* simplex.c */ double ssm_simplex(ssm_theta_t *theta, ssm_var_t *var, void *params, double (*f_simplex)(const gsl_vector *x, void *params), ssm_nav_t *nav, ssm_options_t *opts); @@ -764,7 +766,8 @@ void ssm_workers_stop(ssm_workers_t *workers); double heaviside(double x); double ramp(double x); double slowstep(double x, double d); - +double sigmoid(double x, double shape, double shift); + /******************************/ /* kalman function signatures */ /******************************/ diff --git a/src/C/pmcmc/main_pmcmc.c b/src/C/pmcmc/main_pmcmc.c index aa556cb..d4941e8 100644 --- a/src/C/pmcmc/main_pmcmc.c +++ b/src/C/pmcmc/main_pmcmc.c @@ -18,98 +18,98 @@ #include "ssm.h" -static ssm_err_code_t run_smc(ssm_err_code_t (*f_pred) (ssm_X_t *, double, double, ssm_par_t *, ssm_nav_t *, ssm_calc_t *), ssm_X_t ***D_J_X, ssm_X_t ***D_J_X_tmp, ssm_par_t *par, ssm_calc_t **calc, ssm_data_t *data, ssm_fitness_t *fitness, ssm_nav_t *nav, ssm_workers_t *workers) -{ - int i, j, n, np1, id, the_j; - double t0, t1; - - fitness->log_like = 0.0; - fitness->log_prior = 0.0; - fitness->n_all_fail = 0; - - for(j=0; jJ; j++){ - fitness->cum_status[j] = SSM_SUCCESS; - } - - for(n=0; nn_obs; n++) { - np1 = n+1; - t0 = (n) ? data->rows[n-1]->time: 0; - t1 = data->rows[n]->time; - - if(!workers->flag_tcp){ - for(j=0; jJ; j++){ - ssm_X_copy(D_J_X[np1][j], D_J_X[n][j]); - } - } + static ssm_err_code_t run_smc(ssm_err_code_t (*f_pred) (ssm_X_t *, double, double, ssm_par_t *, ssm_nav_t *, ssm_calc_t *), ssm_X_t ***D_J_X, ssm_X_t ***D_J_X_tmp, ssm_par_t *par, ssm_calc_t **calc, ssm_data_t *data, ssm_fitness_t *fitness, ssm_nav_t *nav, ssm_workers_t *workers) + { + int i, j, n, np1, id, the_j; + double t0, t1; + + fitness->log_like = 0.0; + fitness->log_prior = 0.0; + fitness->n_all_fail = 0; + + for(j=0; jJ; j++){ + fitness->cum_status[j] = SSM_SUCCESS; + } + + for(n=0; nn_obs; n++) { + np1 = n+1; + t0 = (n) ? data->rows[n-1]->time: 0; + t1 = data->rows[n]->time; + + if(!workers->flag_tcp){ + for(j=0; jJ; j++){ + ssm_X_copy(D_J_X[np1][j], D_J_X[n][j]); + } +} - if(workers->flag_tcp){ +if(workers->flag_tcp){ //send work - for (j=0;jJ;j++) { - zmq_send(workers->sender, &n, sizeof (int), ZMQ_SNDMORE); - ssm_zmq_send_par(workers->sender, par, ZMQ_SNDMORE); + for (j=0;jJ;j++) { + zmq_send(workers->sender, &n, sizeof (int), ZMQ_SNDMORE); + ssm_zmq_send_par(workers->sender, par, ZMQ_SNDMORE); - zmq_send(workers->sender, &j, sizeof (int), ZMQ_SNDMORE); - ssm_zmq_send_X(workers->sender, D_J_X[n][j], ZMQ_SNDMORE); - zmq_send(workers->sender, &(fitness->cum_status[j]), sizeof (ssm_err_code_t), 0); - } + zmq_send(workers->sender, &j, sizeof (int), ZMQ_SNDMORE); + ssm_zmq_send_X(workers->sender, D_J_X[n][j], ZMQ_SNDMORE); + zmq_send(workers->sender, &(fitness->cum_status[j]), sizeof (ssm_err_code_t), 0); +} //get results from the workers - for (j=0; jJ; j++) { - zmq_recv(workers->receiver, &the_j, sizeof (int), 0); - ssm_zmq_recv_X(D_J_X[ np1 ][ the_j ], workers->receiver); - zmq_recv(workers->receiver, &(fitness->weights[the_j]), sizeof (double), 0); - zmq_recv(workers->receiver, &(fitness->cum_status[the_j]), sizeof (ssm_err_code_t), 0); - } - - } else if(calc[0]->threads_length > 1){ +for (j=0; jJ; j++) { + zmq_recv(workers->receiver, &the_j, sizeof (int), 0); + ssm_zmq_recv_X(D_J_X[ np1 ][ the_j ], workers->receiver); + zmq_recv(workers->receiver, &(fitness->weights[the_j]), sizeof (double), 0); + zmq_recv(workers->receiver, &(fitness->cum_status[the_j]), sizeof (ssm_err_code_t), 0); +} + +} else if(calc[0]->threads_length > 1){ //send work - for (i=0; ithreads_length; i++) { - zmq_send(workers->sender, &i, sizeof (int), ZMQ_SNDMORE); - zmq_send(workers->sender, &n, sizeof (int), 0); - } + for (i=0; ithreads_length; i++) { + zmq_send(workers->sender, &i, sizeof (int), ZMQ_SNDMORE); + zmq_send(workers->sender, &n, sizeof (int), 0); + } //get results from the workers - for (i=0; ithreads_length; i++) { - zmq_recv(workers->receiver, &id, sizeof (int), 0); - } - } else { - - for(j=0;jJ;j++) { - ssm_X_reset_inc(D_J_X[np1][j], data->rows[n], nav); - fitness->cum_status[j] |= (*f_pred)(D_J_X[np1][j], t0, t1, par, nav, calc[0]); - if(data->rows[n]->ts_nonan_length) { - fitness->weights[j] = (fitness->cum_status[j] == SSM_SUCCESS) ? exp(ssm_log_likelihood(data->rows[n], D_J_X[np1][j], par, calc[0], nav, fitness)) : 0.0; - fitness->cum_status[j] = SSM_SUCCESS; - } - } - } - - if(data->rows[n]->ts_nonan_length) { - if(ssm_weight(fitness, data->rows[n], nav, n)) { - ssm_systematic_sampling(fitness, calc[0], n); - } - ssm_resample_X(fitness, &D_J_X[np1], &D_J_X_tmp[np1], n); - } - } - return ( (data->n_obs != 0) && (fitness->n_all_fail == data->n_obs) ) ? SSM_ERR_PRED: SSM_SUCCESS; + for (i=0; ithreads_length; i++) { + zmq_recv(workers->receiver, &id, sizeof (int), 0); + } +} else { + + for(j=0;jJ;j++) { + ssm_X_reset_inc(D_J_X[np1][j], data->rows[n], nav); + fitness->cum_status[j] |= (*f_pred)(D_J_X[np1][j], t0, t1, par, nav, calc[0]); + if(data->rows[n]->ts_nonan_length) { + fitness->weights[j] = (fitness->cum_status[j] == SSM_SUCCESS) ? exp(ssm_log_likelihood(data->rows[n], D_J_X[np1][j], par, calc[0], nav, fitness)) : 0.0; + fitness->cum_status[j] = SSM_SUCCESS; + } +} +} + +if(data->rows[n]->ts_nonan_length) { + if(ssm_weight(fitness, data->rows[n], nav, n)) { + ssm_systematic_sampling(fitness, calc[0], n); + } + ssm_resample_X(fitness, &D_J_X[np1], &D_J_X_tmp[np1], n); +} +} +return ( (data->n_obs != 0) && (fitness->n_all_fail == data->n_obs) ) ? SSM_ERR_PRED: SSM_SUCCESS; } int main(int argc, char *argv[]) { - char str[SSM_STR_BUFFSIZE]; + char str[SSM_STR_BUFFSIZE]; - ssm_options_t *opts = ssm_options_new(); - ssm_options_load(opts, SSM_PMCMC, argc, argv); + ssm_options_t *opts = ssm_options_new(); + ssm_options_load(opts, SSM_PMCMC, argc, argv); - json_t *jparameters = ssm_load_json_stream(stdin); - json_t *jdata = ssm_load_data(opts); + json_t *jparameters = ssm_load_json_stream(stdin); + json_t *jdata = ssm_load_data(opts); - ssm_nav_t *nav = ssm_nav_new(jparameters, opts); - ssm_data_t *data = ssm_data_new(jdata, nav, opts); - ssm_fitness_t *fitness = ssm_fitness_new(data, opts); - ssm_calc_t **calc = ssm_N_calc_new(jdata, nav, data, fitness, opts); - ssm_X_t ***D_J_X = ssm_D_J_X_new(data, fitness, nav, opts); - ssm_X_t ***D_J_X_tmp = ssm_D_J_X_new(data, fitness, nav, opts); + ssm_nav_t *nav = ssm_nav_new(jparameters, opts); + ssm_data_t *data = ssm_data_new(jdata, nav, opts); + ssm_fitness_t *fitness = ssm_fitness_new(data, opts); + ssm_calc_t **calc = ssm_N_calc_new(jdata, nav, data, fitness, opts); + ssm_X_t ***D_J_X = ssm_D_J_X_new(data, fitness, nav, opts); + ssm_X_t ***D_J_X_tmp = ssm_D_J_X_new(data, fitness, nav, opts); ssm_X_t **D_X = ssm_D_X_new(data, nav, opts); //to store sampled trajectories ssm_X_t **D_X_prev = ssm_D_X_new(data, nav, opts); @@ -131,25 +131,66 @@ int main(int argc, char *argv[]) ssm_f_pred_t f_pred = ssm_get_f_pred(nav); - ssm_workers_t *workers = ssm_workers_start(D_J_X, &par, data, calc, fitness, f_pred, nav, opts, SSM_WORKER_D_X | SSM_WORKER_FITNESS); + ssm_workers_t *workers = ssm_workers_start(D_J_X, &par_proposed, data, calc, fitness, f_pred, nav, opts, SSM_WORKER_D_X | SSM_WORKER_FITNESS); + + +// test + // snprintf(str, SSM_STR_BUFFSIZE, "%s/X_sampled_%d.csv", opts->root, opts->id); + // FILE *sampled_traj_file = fopen(str, "w"); + // if (sampled_traj_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + // ssm_print_header_X(sampled_traj_file, nav); + + // snprintf(str, SSM_STR_BUFFSIZE, "%s/X_particles_%d.csv", opts->root, opts->id); + // FILE *part_traj_file = fopen(str, "w"); + // if (part_traj_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + // ssm_print_header_X(part_traj_file, nav); + + // snprintf(str, SSM_STR_BUFFSIZE, "%s/select_%d.csv", opts->root, opts->id); + // FILE *select_file = fopen(str, "w"); + // if (select_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + // fprintf(select_file, "data\tparticle\tancestor\n"); + + // snprintf(str, SSM_STR_BUFFSIZE, "%s/part_sampled_%d.csv", opts->root, opts->id); + // FILE *sampled_part_file = fopen(str, "w"); + // if (sampled_part_file == NULL) + // { + // printf("Error opening file!\n"); + // exit(1); + // } + /*test*/ + +// test ///////////////////////// // initialization step // ///////////////////////// int j, n; + // int j_select; int m = 0; ssm_par2X(D_J_X[0][0], par, calc[0], nav); for(j=1; jJ; j++){ - ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); + ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); } ssm_err_code_t success = run_smc(f_pred, D_J_X, D_J_X_tmp, par_proposed, calc, data, fitness, nav, workers); success |= ssm_log_prob_prior(&fitness->log_prior, proposed, nav, fitness); if(success != SSM_SUCCESS){ - ssm_print_err("epic fail, initialization step failed"); - exit(EXIT_FAILURE); + ssm_print_err("epic fail, initialization step failed"); + exit(EXIT_FAILURE); } //the first run is accepted @@ -157,116 +198,140 @@ int main(int argc, char *argv[]) fitness->log_prior_prev = fitness->log_prior; if ( ( nav->print & SSM_PRINT_X ) && data->n_obs ) { + ssm_sample_traj(D_X, D_J_X, calc[0], data, fitness); - for(n=0; nn_obs; n++){ - ssm_X_copy(D_X_prev[n+1], D_X[n+1]); - ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); - } + + // j_select = ssm_sample_traj_print2(sampled_traj_file, D_J_X, par, nav, calc[0], data, fitness, m); + // ssm_sample_traj2(D_X, D_J_X, calc[0], data, fitness, j_select); + + // fprintf(sampled_part_file, "%i\n", j_select); + // create a file, print all particle trajectories (D_J_X) + // for(j=0; jJ; j++){ + // for(n=0; nn_obs; n++){ + // ssm_print_X(part_traj_file, D_J_X[n+1][j], par, nav, calc[0], data->rows[n], j); + // fprintf(select_file, "%i\t%i\t%i\n", n, j, fitness->select[n][j]); + // } + // } + // create another file and print the resampled trajectory (D_X) + // actually this already printed in the X_0.csv + // try to understand where is the error in the resampling + + for(n=0; nn_obs; n++){ + ssm_X_copy(D_X_prev[n+1], D_X[n+1]); + ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); + } + } if(nav->print & SSM_PRINT_TRACE){ - ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); + ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); } ssm_dic_init(fitness, fitness->log_like_prev, fitness->log_prior_prev); if (nav->print & SSM_PRINT_LOG) { - snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); - ssm_print_log(str); - } + snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); + ssm_print_log(str); + } //////////////// // iterations // //////////////// - double sd_fac; - double ratio; - for(m=1; mdt = D_J_X[0][0]->dt0; - for(j=1; jJ; j++){ - ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); - } + double sd_fac; + double ratio; + for(m=1; mdt = D_J_X[0][0]->dt0; + for(j=1; jJ; j++){ + ssm_X_copy(D_J_X[0][j], D_J_X[0][0]); + } - success |= run_smc(f_pred, D_J_X, D_J_X_tmp, par_proposed, calc, data, fitness, nav, workers); - success |= ssm_metropolis_hastings(fitness, &ratio, proposed, theta, var, sd_fac, nav, calc[0], 1); - } + success |= run_smc(f_pred, D_J_X, D_J_X_tmp, par_proposed, calc, data, fitness, nav, workers); + success |= ssm_metropolis_hastings(fitness, &ratio, proposed, theta, var, sd_fac, nav, calc[0], 1); + } if(success == SSM_SUCCESS){ //everything went well and the proposed theta was accepted - fitness->log_like_prev = fitness->log_like; - fitness->log_prior_prev = fitness->log_prior; - ssm_theta_copy(theta, proposed); - ssm_par_copy(par, par_proposed); - - if ( (nav->print & SSM_PRINT_X) && data->n_obs ) { - ssm_sample_traj(D_X, D_J_X, calc[0], data, fitness); - for(n=0; nn_obs; n++){ - ssm_X_copy(D_X_prev[n+1], D_X[n+1]); - } + fitness->log_like_prev = fitness->log_like; + fitness->log_prior_prev = fitness->log_prior; + ssm_theta_copy(theta, proposed); + ssm_par_copy(par, par_proposed); + + if ( (nav->print & SSM_PRINT_X) && data->n_obs ) { + ssm_sample_traj(D_X, D_J_X, calc[0], data, fitness); + for(n=0; nn_obs; n++){ + ssm_X_copy(D_X_prev[n+1], D_X[n+1]); } + } } ssm_adapt_ar(adapt, (success == SSM_SUCCESS) ? 1: 0, m); //compute acceptance rate ssm_adapt_var(adapt, theta, m); //compute empirical variance if ( (nav->print & SSM_PRINT_X) && ( (m % thin_traj) == 0) ) { - for(n=0; nn_obs; n++){ - ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); - } + for(n=0; nn_obs; n++){ + ssm_print_X(nav->X, D_X_prev[n+1], par, nav, calc[0], data->rows[n], m); + } } if (nav->print & SSM_PRINT_TRACE){ - ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); + ssm_print_trace(nav->trace, theta, nav, fitness->log_like_prev + fitness->log_prior_prev, m); } - ssm_dic_update(fitness, fitness->log_like_prev, fitness->log_prior_prev); + ssm_dic_update(fitness, fitness->log_like_prev, fitness->log_prior_prev); if (nav->print & SSM_PRINT_DIAG) { - ssm_print_ar(nav->diag, adapt, m); + ssm_print_ar(nav->diag, adapt, m); } - if (nav->print & SSM_PRINT_LOG) { - snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); - ssm_print_log(str); - } - } + if (nav->print & SSM_PRINT_LOG) { + snprintf(str, SSM_STR_BUFFSIZE, "%d\t logLike.: %g\t accepted: %d\t acc. rate: %g", m, fitness->log_like_prev + fitness->log_prior_prev, !(success & SSM_MH_REJECT), adapt->ar); + ssm_print_log(str); + } + } - if (!(nav->print & SSM_PRINT_LOG)) { - ssm_dic_end(fitness, nav, m); - ssm_pipe_theta(stdout, jparameters, theta, var, fitness, nav, opts); - } + if (!(nav->print & SSM_PRINT_LOG)) { + ssm_dic_end(fitness, nav, m); + ssm_pipe_theta(stdout, jparameters, theta, var, fitness, nav, opts); + } - json_decref(jparameters); + // test + // fclose(sampled_traj_file); + // fclose(part_traj_file); + // fclose(select_file); + // fclose(sampled_part_file); +// test + json_decref(jparameters); - ssm_workers_stop(workers); + ssm_workers_stop(workers); - ssm_D_J_X_free(D_J_X, data, fitness); - ssm_D_J_X_free(D_J_X_tmp, data, fitness); - ssm_D_X_free(D_X, data); - ssm_D_X_free(D_X_prev, data); + ssm_D_J_X_free(D_J_X, data, fitness); + ssm_D_J_X_free(D_J_X_tmp, data, fitness); + ssm_D_X_free(D_X, data); + ssm_D_X_free(D_X_prev, data); - ssm_N_calc_free(calc, nav); + ssm_N_calc_free(calc, nav); - ssm_data_free(data); - ssm_nav_free(nav); + ssm_data_free(data); + ssm_nav_free(nav); - ssm_fitness_free(fitness); + ssm_fitness_free(fitness); - ssm_input_free(input); - ssm_par_free(par_proposed); - ssm_par_free(par); + ssm_input_free(input); + ssm_par_free(par_proposed); + ssm_par_free(par); - ssm_theta_free(theta); - ssm_theta_free(proposed); - ssm_var_free(var_input); - ssm_adapt_free(adapt); + ssm_theta_free(theta); + ssm_theta_free(proposed); + ssm_var_free(var_input); + ssm_adapt_free(adapt); - return 0; -} + return 0; + } diff --git a/src/C/templates/observed_template.c b/src/C/templates/observed_template.c index 19c4263..f557f14 100644 --- a/src/C/templates/observed_template.c +++ b/src/C/templates/observed_template.c @@ -8,10 +8,12 @@ static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *p { double like; double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + double gsl_mu = {{ x.mean }}; double gsl_sd = {{ x.sd }}; - // printf("mu %f sd %f y %f\n",gsl_mu, gsl_sd,y); if (y > 0.0) { like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd) - gsl_cdf_gaussian_P(y - 0.5 - gsl_mu, gsl_sd); @@ -19,9 +21,25 @@ static double f_likelihood_tpl_{{ x.name }}(double y, ssm_X_t *p_X, ssm_par_t *p like = gsl_cdf_gaussian_P(y + 0.5 - gsl_mu, gsl_sd); } + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + + like = gsl_ran_poisson_pdf(rint(y), gsl_mu); + + {% elif x.distribution == 'binomial' %} + + double gsl_p = {{ x.p }}; + double gsl_n = {{ x.n }}; + + like = gsl_ran_binomial_pdf(rint(y), gsl_p, gsl_n); + + {% endif %} + return like; } + static double f_obs_mean_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) { double *X = p_X->proj; @@ -34,15 +52,36 @@ static double f_obs_var_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_ return pow({{ x.sd }}, 2); } + static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, double t) { double *X = p_X->proj; + + {% if x.distribution == 'discretized_normal' %} + double gsl_mu = {{ x.mean }}; double gsl_sd = {{ x.sd }}; - double yobs= gsl_mu+gsl_ran_gaussian(calc->randgsl, gsl_sd); + double yobs = gsl_mu + gsl_ran_gaussian(calc->randgsl, gsl_sd); + + yobs = (yobs >0) ? yobs : 0.0; + + {% elif x.distribution == 'poisson' %} + + double gsl_mu = {{ x.mean }}; + + double yobs = gsl_ran_poisson(calc->randgsl, gsl_mu); + + {% elif x.distribution == 'binomial' %} + + double gsl_p = {{ x.p }}; + double gsl_n = {{ x.n }}; - return (yobs >0) ? yobs : 0.0; + double yobs = gsl_ran_binomial(calc->randgsl, gsl_p, gsl_n); + + {% endif %} + + return yobs; } {% endfor %} @@ -53,10 +92,10 @@ static double f_obs_ran_tpl_{{ x.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_ * First order Taylor expansion * Var(f(X))= \sum_i \frac{\partial f(E(X_i))}{\partial x_i}Var(X_i)+\sum_{i\neqj}\frac{\partial f(E(X_i))}{\partial x_i}\frac{\partial f(E(X_j))}{\partial x_ij}Cov(X_i,X_j) */ -{% for h, x in h_grads.items() %} -{% for t, y in x.items() %} -static double f_var_pred_tpl_{{ y.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, double t) -{ + {% for h, x in h_grads.items() %} + {% for t, y in x.items() %} + static double f_var_pred_tpl_{{ y.name }}(ssm_X_t *p_X, ssm_par_t *par, ssm_calc_t *calc, ssm_nav_t *nav, double t) + { double res = 0; int m = nav->states_sv->length + nav->states_inc->length + nav->states_diff->length; gsl_matrix_const_view Ct = gsl_matrix_const_view_array(&p_X->proj[m], m, m); diff --git a/src/Ccoder.py b/src/Ccoder.py index 99cd4bc..aa0b5d6 100755 --- a/src/Ccoder.py +++ b/src/Ccoder.py @@ -95,6 +95,11 @@ def parameters(self): pdict = {x['name']:x for x in parameters} sdict = {'diff__' + x['name']: x for x in drifts} + for x in pars: + if x not in pdict: + raise SsmError('An input is missing for parameter %s' %x) + + f_remainders = {} f_remainders_par = {} f_remainders_var = {} @@ -138,14 +143,21 @@ def parameters(self): def observed(self): - ##WARNING right now only the discretized normal is supported. - ##TODO: generalization for different distribution obs = copy.deepcopy(self.obs_model) - + for x in obs: - x['mean'] = self.make_C_term(x['mean'], True) - x['sd'] = self.make_C_term(x['sd'], True) + if x['distribution'] == 'discretized_normal': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + elif x['distribution'] == 'poisson': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + elif x['distribution'] == 'binomial': + x['mean'] = self.make_C_term(x['mean'], True) + x['sd'] = self.make_C_term(x['sd'], True) + x['p'] = self.make_C_term(x['p'], True) + x['n'] = self.make_C_term(x['n'], True) return {'observed': obs} diff --git a/src/Cmodel.py b/src/Cmodel.py index 70305b6..397d9de 100755 --- a/src/Cmodel.py +++ b/src/Cmodel.py @@ -43,7 +43,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.op = set(['+', '-', '*', '/', ',', '(', ')']) ##!!!CAN'T contain square bracket '[' ']' self.reserved = set(['U', 'x', 't', 'E', 'LN2', 'LN10','LOG2E', 'LOG10E', 'PI', 'SQRT1_2', 'SQRT2']) #JS Math Global Object - self.special_functions = set(['terms_forcing', 'heaviside', 'ramp', 'slowstep', 'sin', 'cos', 'correct_rate', 'ssm_correct_rate', 'sqrt', 'pow']) + self.special_functions = set(['terms_forcing', 'heaviside', 'ramp', 'slowstep', 'sigmoid', 'sin', 'cos', 'correct_rate', 'ssm_correct_rate', 'sqrt', 'pow', 'exp', 'log']) self.remainder = sorted([x['remainder']['name'] for x in self.model['populations'] if 'remainder' in x]) self.ur = ['U'] + self.remainder @@ -66,7 +66,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): p['require']['name'] = p['name'] #semantic to SSM transfo - prior = { 'distribution': resource['name'] if resource['name'] != 'dirac' else 'fixed' } + prior = { 'distribution': resource['name'] if resource['name'] != 'dirac' else 'fixed' } for x in resource['distributionParameter']: prior[x['name'] if 'name' in x else 'value'] = x['value'] @@ -125,7 +125,17 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): #par_obs par_obs = set(); for o in observations: - for p in [o['mean'], o['sd']]: + if o['distribution'] == 'discretized_normal': + pars = [o['mean'], o['sd']] + elif o['distribution'] == 'poisson': + o['sd'] = 'sqrt('+o['mean']+')' + pars = [o['mean'], o['sd']] + elif o['distribution'] == 'binomial': + o['mean'] = o['p']+'*'+o['n'] + o['sd'] = 'sqrt('+o['p']+'*(1-'+o['p']+')*'+o['n']+')' + pars = [o['n'], o['p'],o['mean'], o['sd']] + + for p in pars: el = self.change_user_input(p) for e in el: if e not in self.op and e not in self.reserved and e not in self.special_functions and e not in self.par_sv and e not in self.par_noise and e not in self.par_proc and e not in self.par_forced and e not in self.par_inc: @@ -136,7 +146,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.par_obs = sorted(list(par_obs)) - ##par_disp (parameter involve **only** in dispertion (nowhere else) + ##par_disp (parameter involve **only** in dispersion (nowhere else)) disp = [x for subl in sde['dispersion'] for x in subl if x != 0] if 'dispersion' in sde else [] par_disp = set() for x in disp: @@ -150,13 +160,14 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.par_disp = sorted(list(par_disp)) - #par_diff (state variable for diffusions) + ##par_diff (state variable for diffusions) par_diff = [] if sde: for x in sde.get('drift', []): par_diff.append(x['name']) - self.par_diff = ['diff__' + x for x in sorted(par_diff)] + ##NOTE: do not sort par_diff as the dispersion matrix is read in the unsorted order + self.par_diff = ['diff__' + x for x in par_diff] ##par_other par_ssm = self.par_sv + self.par_inc + self.remainder + self.par_diff + self.par_noise + self.par_proc + self.par_obs + self.par_forced + self.par_disp @@ -179,7 +190,7 @@ def __init__(self, dpkgRoot, dpkg, **kwargs): self.map_name2prior_name[p['name']] = p['require']['name'] else: self.map_name2prior_name[p['name']] = p['name'] - + # proc_model self.proc_model = copy.deepcopy(reactions) @@ -260,12 +271,12 @@ def pow2star(self, term): if not start and not terms[0] == 'pow': return term - pos = 1 #counter for open parenthesis + pos = 1 #counter for open parenthesis ind = start+2 #skip first parenthesis lhs = '' rhs = '' left = True - while (ind < len(terms)): + while (ind < len(terms)): if terms[ind] == '(': pos += 1 if terms[ind] == ')': @@ -337,7 +348,7 @@ def generator_C(self, term, no_correct_rate, force_par=False, xify=None, human=F ind += 2 #skip first parenthesis stack.append({"f": myf, "pos": 1}) #pos: counter for open parenthesis else: - if stack: + if stack: if terms[ind] == '(': stack[-1]['pos'] += 1 Cterm += '(' @@ -353,7 +364,7 @@ def generator_C(self, term, no_correct_rate, force_par=False, xify=None, human=F Cterm += ')' else: Cterm += ')' - + else: Cterm += ')' diff --git a/src/Data.py b/src/Data.py index 460529c..1832986 100644 --- a/src/Data.py +++ b/src/Data.py @@ -107,7 +107,8 @@ def prepare_data(self): dates.sort() data_C = [] - for d in dates: + + for i, d in enumerate(dates): pd = parse_date(d) row = { 'date': pd.isoformat(), @@ -117,9 +118,13 @@ def prepare_data(self): 'time': (pd-self.t0).days } + if i > 0: + for x in obs_id: + if dates[i-1] in data[x]['dict']: + row['reset'].extend(data[x]['ind_inc_reset']) + for x in obs_id: - if d in data[x]['dict']: - row['reset'].extend(data[x]['ind_inc_reset']) + if d in data[x]['dict']: if data[x]['dict'][d] is not None: row['observed'].append(data[x]['order']) row['values'].append(data[x]['f'](data[x]['dict'][d]))