77
88 "github.com/application-research/autoretrieve/blocks"
99 "github.com/application-research/autoretrieve/metrics"
10+ "github.com/dustin/go-humanize"
1011 lassieretriever "github.com/filecoin-project/lassie/pkg/retriever"
12+ "github.com/filecoin-project/lassie/pkg/types"
1113 "github.com/ipfs/go-bitswap/message"
1214 bitswap_message_pb "github.com/ipfs/go-bitswap/message/pb"
1315 "github.com/ipfs/go-bitswap/network"
@@ -133,15 +135,15 @@ func NewProvider(
133135 provider .network .Start (provider )
134136
135137 for i := 0 ; i < int (config .RequestWorkers ); i ++ {
136- go provider .handleRequests ()
138+ go provider .handleRequests (ctx )
137139 }
138140
139141 for i := 0 ; i < int (config .ResponseWorkers ); i ++ {
140- go provider .handleResponses ()
142+ go provider .handleResponses (ctx )
141143 }
142144
143145 for i := 0 ; i < int (config .RetrievalWorkers ); i ++ {
144- go provider .handleRetrievals ()
146+ go provider .handleRetrievals (ctx )
145147 }
146148
147149 return provider , nil
@@ -160,10 +162,8 @@ func (provider *Provider) ReceiveMessage(ctx context.Context, sender peer.ID, in
160162 provider .requestQueue .PushTasks (sender , tasks ... )
161163}
162164
163- func (provider * Provider ) handleRequests () {
164- ctx := context .Background ()
165-
166- for {
165+ func (provider * Provider ) handleRequests (ctx context.Context ) {
166+ for ctx .Err () == nil {
167167 peerID , tasks , _ := provider .requestQueue .PopTasks (100 )
168168 if len (tasks ) == 0 {
169169 time .Sleep (time .Millisecond * 250 )
@@ -256,10 +256,8 @@ func (provider *Provider) handleRequest(
256256 return nil
257257}
258258
259- func (provider * Provider ) handleResponses () {
260- ctx := context .Background ()
261-
262- for {
259+ func (provider * Provider ) handleResponses (ctx context.Context ) {
260+ for ctx .Err () == nil {
263261 peerID , tasks , _ := provider .responseQueue .PopTasks (targetMessageSize )
264262 if len (tasks ) == 0 {
265263 time .Sleep (time .Millisecond * 250 )
@@ -291,15 +289,15 @@ func (provider *Provider) handleResponses() {
291289 log .Debugf ("Sending have for %s" , cid )
292290
293291 // Response metric
294- ctx , _ = tag .New (ctx , tag .Insert (metrics .BitswapTopic , "HAVE" ))
295- stats .Record (ctx , metrics .BitswapResponseCount .M (1 ))
292+ taggedCtx , _ : = tag .New (ctx , tag .Insert (metrics .BitswapTopic , "HAVE" ))
293+ stats .Record (taggedCtx , metrics .BitswapResponseCount .M (1 ))
296294 case actionSendDontHave :
297295 msg .AddDontHave (cid )
298296 log .Debugf ("Sending dont have for %s" , cid )
299297
300298 // Response metric
301- ctx , _ = tag .New (ctx , tag .Insert (metrics .BitswapTopic , "DONT_HAVE" ), tag .Insert (metrics .BitswapDontHaveReason , data .reason ))
302- stats .Record (ctx , metrics .BitswapResponseCount .M (1 ))
299+ taggedCtx , _ : = tag .New (ctx , tag .Insert (metrics .BitswapTopic , "DONT_HAVE" ), tag .Insert (metrics .BitswapDontHaveReason , data .reason ))
300+ stats .Record (taggedCtx , metrics .BitswapResponseCount .M (1 ))
303301 case actionSendBlock :
304302 block , err := provider .blockManager .Get (ctx , cid )
305303 if err != nil {
@@ -310,8 +308,8 @@ func (provider *Provider) handleResponses() {
310308 log .Debugf ("Sending block for %s" , cid )
311309
312310 // Response metric
313- ctx , _ = tag .New (ctx , tag .Insert (metrics .BitswapTopic , "BLOCK" ))
314- stats .Record (ctx , metrics .BitswapResponseCount .M (1 ))
311+ taggedCtx , _ : = tag .New (ctx , tag .Insert (metrics .BitswapTopic , "BLOCK" ))
312+ stats .Record (taggedCtx , metrics .BitswapResponseCount .M (1 ))
315313 }
316314 }
317315
@@ -325,10 +323,8 @@ func (provider *Provider) handleResponses() {
325323 }
326324}
327325
328- func (provider * Provider ) handleRetrievals () {
329- ctx := context .Background ()
330-
331- for {
326+ func (provider * Provider ) handleRetrievals (ctx context.Context ) {
327+ for ctx .Err () == nil {
332328 peerID , tasks , _ := provider .retrievalQueue .PopTasks (1 )
333329 if len (tasks ) == 0 {
334330 time .Sleep (time .Millisecond * 250 )
@@ -344,38 +340,52 @@ func (provider *Provider) handleRetrievals() {
344340 continue
345341 }
346342
347- log .Debugf ("Requesting retrieval for %s" , cid )
343+ retrievalId , err := types .NewRetrievalID ()
344+ if err != nil {
345+ log .Errorf ("Failed to create retrieval ID: %s" , err .Error ())
346+ }
347+
348+ log .Debugf ("Starting retrieval for %s (%s)" , cid , retrievalId )
349+
350+ // Start a background blockstore fetch with a callback to send the block
351+ // to the peer once it's available.
352+ blockCtx , blockCancel := context .WithCancel (ctx )
353+ if provider .blockManager .AwaitBlock (blockCtx , cid , func (block blocks.Block , err error ) {
354+ if err != nil {
355+ log .Debugf ("Async block load failed: %s" , err )
356+ provider .queueSendDontHave (peerID , task .Priority , cid , "failed_block_load" )
357+ } else {
358+ log .Debugf ("Async block load completed: %s" , cid )
359+ provider .queueSendBlock (peerID , task .Priority , cid , block .Size )
360+ }
361+ blockCancel ()
362+ }) {
363+ // If the block was already in the blockstore then we don't need to
364+ // start a retrieval.
365+ continue
366+ }
348367
349368 // Try to start a new retrieval (if it's already running then no
350369 // need to error, just continue on to await block)
351- if err := provider .retriever .Request (cid ); err != nil {
352- if ! errors .As (err , & lassieretriever.ErrRetrievalAlreadyRunning {}) {
353- if errors .Is (err , lassieretriever .ErrNoCandidates ) {
354- // Just do a debug print if there were no candidates because this happens a lot
355- log .Debugf ("No candidates for %s" , cid )
356- } else {
357- // Otherwise, there was a real failure, print with more importance
358- log .Errorf ("Request for %s failed: %v" , cid , err )
359- }
360- } else {
370+ result , err := provider .retriever .Retrieve (ctx , retrievalId , cid )
371+ if err != nil {
372+ if errors .Is (err , lassieretriever .ErrRetrievalAlreadyRunning ) {
361373 log .Debugf ("Retrieval already running for %s, no new one will be started" , cid )
374+ continue // Don't send dont_have or run blockCancel(), let it async load
375+ } else if errors .Is (err , lassieretriever .ErrNoCandidates ) {
376+ // Just do a debug print if there were no candidates because this happens a lot
377+ log .Debugf ("No candidates for %s (%s)" , cid , retrievalId )
378+ provider .queueSendDontHave (peerID , task .Priority , cid , "no_candidates" )
379+ } else {
380+ // Otherwise, there was a real failure, print with more importance
381+ log .Errorf ("Retrieval for %s (%s) failed: %v" , cid , retrievalId , err )
382+ provider .queueSendDontHave (peerID , task .Priority , cid , "retrieval_failed" )
362383 }
363384 } else {
364- log .Infof ("Started retrieval for %s" , cid )
385+ log .Infof ("Retrieval for %s (%s) completed (duration: %s, bytes: %s, blocks: %d) " , cid , retrievalId , result . Duration , humanize . IBytes ( result . Size ), result . Blocks )
365386 }
366387
367- // TODO: if retriever.Request() is changed to be blocking, make
368- // blockManager.AwaitBlock() cancellable and cancel it after the
369- // request finishes if there's an error
370- provider .blockManager .AwaitBlock (ctx , cid , func (block blocks.Block , err error ) {
371- if err != nil {
372- log .Debugf ("Async block load failed: %s" , err )
373- provider .queueSendDontHave (peerID , task .Priority , block .Cid , "failed_block_load" )
374- } else {
375- log .Debugf ("Async block load completed: %s" , block .Cid )
376- provider .queueSendBlock (peerID , task .Priority , block .Cid , block .Size )
377- }
378- })
388+ blockCancel ()
379389 }
380390
381391 provider .retrievalQueue .TasksDone (peerID , tasks ... )
0 commit comments