diff --git a/Makefile b/Makefile index 5ae38a0..264d3a9 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ QA_TILES ?= planet DATA_TILES ?= mbtiles://./data/osm/$(QA_TILES).mbtiles BBOX ?= '-180,-85,180,85' IMAGE_TILES ?= "tilejson+https://a.tiles.mapbox.com/v4/mapbox.satellite.json?access_token=$(MapboxAccessToken)" -TRAIN_SIZE ?= 5000 +TRAIN_SIZE ?= 1000 CLASSES ?= classes/roads.json LABEL_RATIO ?= 0 ZOOM_LEVEL ?= 17 @@ -22,41 +22,39 @@ data/osm/%.mbtiles: mkdir -p $(dir $@) curl https://s3.amazonaws.com/mapbox/osm-qa-tiles/latest.country/$(notdir $@).gz | gunzip > $@ - -# Make a list of all the tiles within BBOX +# Make a list of all the tiles within BBOX at ZOOM_LEVEL data/all_tiles.txt: if [[ $(DATA_TILES) == mbtiles* ]] ; then \ - tippecanoe-enumerate $(subst mbtiles://./,,$(DATA_TILES)) | node lib/read-sample.js --bbox='$(BBOX)' > $@ ; \ + node lib/cover.js --bbox='$(BBOX)' --zoom='$(ZOOM_LEVEL)' --mbtiles=$(subst mbtiles://./,,$(DATA_TILES)) > $@ ; \ else echo "$(DATA_TILES) is not an mbtiles source: you will need to create data/all_tiles.txt manually." && exit 1 ; \ fi -# Make a random sample from all_tiles.txt of TRAIN_SIZE tiles, possibly -# 'overzooming' them to zoom=ZOOM_LEVEL -data/sample.txt: data/all_tiles.txt - ./sample $^ $(TRAIN_SIZE) $(ZOOM_LEVEL) > $@ - # Rasterize the data tiles to bitmaps where each pixel is colored according to # the class defined in CLASSES # (no class / background => black) -data/labels/color: data/sample.txt +data/labels/color: data/all_tiles.txt mkdir -p $@ cp $(CLASSES) data/classes.json - cat data/sample.txt | \ + cat data/all_tiles.txt | \ parallel --pipe --block 10K './rasterize-labels $(DATA_TILES) $(CLASSES) $@ $(LABEL_RATIO)' -data/labels/label-counts.txt: data/labels/color data/sample.txt +data/labels/label-counts.txt: data/labels/color data/all_tiles.txt #If LABEL_RATIO != 0, this will drop references for images which aren't found - cat data/sample.txt | \ + cat data/all_tiles.txt | \ parallel --pipe --block 10K --group './label-counts $(CLASSES) data/labels/color' > $@ # Also generate label-stats.csv cat data/labels/label-counts.txt | ./label-stats > data/labels/label-stats.csv -# Once we've generated label bitmaps, we can make a version of the original sample +# Once we've generated label bitmaps, we can make a version of the original tile list # filtered to tiles with the ratio (pixels with non-background label)/(total pixels) # above the LABEL_RATIO threshold -data/sample-filtered.txt: data/labels/label-counts.txt +data/filtered.txt: data/labels/label-counts.txt cat $^ | node lib/read-sample.js --label-ratio $(LABEL_RATIO) > $@ +# Make a random sample from all_tiles.txt of TRAIN_SIZE tiles +data/sample-filtered.txt: data/filtered.txt + ./sample $^ $(TRAIN_SIZE) > $@ + data/labels/grayscale: data/sample-filtered.txt mkdir -p $@ cat $^ | \ diff --git a/README.md b/README.md index 1afc834..66492be 100644 --- a/README.md +++ b/README.md @@ -56,11 +56,11 @@ variables you want to set. Then run: docker-compose build ``` -to build your local docker image, and +to build your local docker image, and ``` docker-compose run data download-osm-tiles -docker-compose run data +docker-compose run data ``` to download the OSM QA tiles, and run the data collection as specified @@ -102,21 +102,12 @@ a full training set using the instructions above. ### Install - Install [NodeJS v4.6.2](https://nodejs.org/dist/v4.6.2/) - - Install [tippecanoe](https://github.com/mapbox/tippecanoe) - Install [GNU Parallel](https://www.gnu.org/software/parallel/) - Install [shuf](https://www.gnu.org/software/coreutils/) - Clone this repo and run `npm install`. (Note that this includes a node-mapnik install, which sometimes has trouble building in bleeding-edge versions of node.) -### Sample available tiles - -`make data/sample.txt` - -This just does a simple random sample of the available tiles in the given -`mbtiles` set, using `tippecanoe-enumerate`. For more intelligent filtering, -consider using `tippecanoe-decode` to examine (geojson) contents of each tile. - ### Labels Build label images: `make data/labels/color` or `make data/labels/grayscale`. diff --git a/download-images b/download-images index cf543f1..8246db6 100755 --- a/download-images +++ b/download-images @@ -1,6 +1,11 @@ #!/usr/bin/env node +var opts = { + require: ['tilelive-mapnik', 'tilelive-raster', 'tilelive-vector'] +} + var tilelive = require('tilelive') +require("tilelive-modules/loader")(tilelive, opts); var queue = require('queue-async') var readSample = require('./lib/read-sample') var writeTile = require('./lib/write-tile') @@ -27,4 +32,3 @@ tilelive.load(input, function (err, source) { }) }) }) - diff --git a/lib/cover.js b/lib/cover.js new file mode 100755 index 0000000..e9fbb8e --- /dev/null +++ b/lib/cover.js @@ -0,0 +1,18 @@ +#!/usr/bin/env node + +const cover = require('@mapbox/tile-cover') +const bb = require('turf-bbox-polygon') +const argv = require('minimist')(process.argv.slice(2)) + +const bbox = argv.bbox +const zoom = argv.zoom +const mbtiles = argv.mbtiles + +const geo = bb(bbox.split(',').map(b => +b)) + +cover.tiles(geo.geometry, { + min_zoom: parseInt(zoom, 10), + max_zoom: parseInt(zoom, 10) +}).forEach(function (tile) { + console.log([mbtiles, tile[2], tile[0], tile[1]].join(' ')) +}) diff --git a/package.json b/package.json index 55c92c2..21f4358 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,10 @@ "author": "Anand Thakker (http://anandthakker.net/)", "license": "ISC", "dependencies": { - "@mapbox/tile-reduce": "^3.1.1", + "@mapbox/tile-cover": "^3.0.2", + "@mapbox/tile-reduce": "3.1.1", + "@mapbox/tilelive-mapnik": "^1.0.0", + "@mapbox/tilelive-vector": "^4.0.0", "@turf/bbox": "^3.7.0", "@turf/inside": "^3.7.0", "geojson-stream": "0.0.1", @@ -28,7 +31,8 @@ "tilebelt": "^1.0.1", "tilejson": "^1.0.1", "tilelive": "^5.12.2", - "tilelive-vector": "^3.9.2", + "tilelive-modules": "^0.4.0", + "tilelive-raster": "^0.5.0", "turf-bbox-polygon": "^3.0.12" }, "devDependencies": { diff --git a/rasterize-labels b/rasterize-labels index 4894e4f..96aa058 100755 --- a/rasterize-labels +++ b/rasterize-labels @@ -1,8 +1,13 @@ #!/usr/bin/env node +var opts = { + require: ['tilelive-mapnik', 'tilelive-raster', 'tilelive-vector'] +} + var path = require('path') -var Vector = require('tilelive-vector') +var Vector = require('@mapbox/tilelive-vector') var tilelive = require('tilelive') +require("tilelive-modules/loader")(tilelive, opts); require('mbtiles').registerProtocols(tilelive) var queue = require('queue-async') var argv = require('minimist')(process.argv.slice(2)) diff --git a/sample b/sample index 2b8a3c2..3fe44fe 100755 --- a/sample +++ b/sample @@ -3,11 +3,9 @@ var fs = require('fs') var exec = require('child_process').exec var split = require('split') -var tilebelt = require('tilebelt') var tiles = process.argv[2] var trainSize = process.argv[3] -var zoomLevel = (process.argv[4] || '').trim() var sample var i = -1 @@ -17,11 +15,8 @@ countLines(tiles, function (err, total) { fs.createReadStream(tiles) .pipe(split()) .on('data', function (line) { + if (!line) return if (!sample) { - if (zoomLevel) { - var realZoom = parseInt(line.split(' ')[1], 10) - total = total * Math.pow(4, zoomLevel - realZoom) - } sample = [] while (sample.length < trainSize && sample.length < total) { var l = Math.floor(Math.random() * total) @@ -30,43 +25,13 @@ countLines(tiles, function (err, total) { sample.sort((a, b) => (a - b)) } - if (zoomLevel) { - line = line.split(' ') - var tile = [line[2], line[3], line[1]].map(Number) // zxy -> xyz - var tiles = getDescendants(tile, zoomLevel) - tiles.forEach(function (tile) { - if (sample[0] === i++) { - console.log([line[0], tile[2], tile[0], tile[1]].join(' ')) // xyz -> file z x y - sample.shift() - } - }) - } else { - if (sample[0] === i++) { - console.log(line) - sample.shift() - } + if (sample[0] === i++) { + console.log(line) + sample.shift() } }) }) -function getDescendants (tile, zoom) { - var z = tile[2] - var tiles = [tile] - while (z < zoom) { - var c = 0 - var nextTiles = new Array(tiles.length * 4) - for (var i = 0; i < tiles.length; i++) { - var children = tilebelt.getChildren(tiles[i]) - for (var j = 0; j < 4; j++) { - nextTiles[c++] = children[j] - } - } - tiles = nextTiles - z++ - } - return tiles -} - function countLines (file, cb) { exec('wc -l ' + file, function (err, result) { if (err) { return cb(err) }