@@ -9,11 +9,12 @@ Code related to permanently deleting projects.
99
1010import  getLogger  from  "@cocalc/backend/logger" ; 
1111import  getPool  from  "@cocalc/database/pool" ; 
12- import  {  callback2  }  from  "@cocalc/util/async-utils" ; 
13- import  {  PostgreSQL  }  from  "./types" ; 
14- import  {  minutes_ago  }  from  "@cocalc/util/misc" ; 
1512import  {  getServerSettings  }  from  "@cocalc/database/settings" ; 
13+ import  {  callback2  }  from  "@cocalc/util/async-utils" ; 
1614import  {  KUCALC_ON_PREMISES  }  from  "@cocalc/util/db-schema/site-defaults" ; 
15+ import  {  minutes_ago  }  from  "@cocalc/util/misc" ; 
16+ import  {  bulk_delete  }  from  "./bulk-delete" ; 
17+ import  {  PostgreSQL  }  from  "./types" ; 
1718
1819const  log  =  getLogger ( "db:delete-projects" ) ; 
1920
@@ -59,8 +60,9 @@ async function get_account_id(
5960} 
6061
6162/* 
62- This deletes all projects older than the given number of days, from the perspective of a user. 
63- Another task has to run to actually get rid of the data, etc. 
63+ This removes all users from all projects older than the given number of days and marked as deleted. 
64+ In particular, users are no longer able to access that project. 
65+ The "cleanup_old_projects_data" function has to run to actually get rid of the data, etc. 
6466*/ 
6567export  async  function  unlink_old_deleted_projects ( 
6668  db : PostgreSQL , 
@@ -70,7 +72,7 @@ export async function unlink_old_deleted_projects(
7072    query : "UPDATE projects" , 
7173    set : {  users : null  } , 
7274    where : [ 
73-       "deleted   = true" , 
75+       "deleted = true" , 
7476      "users IS NOT NULL" , 
7577      `last_edited <= NOW() - '${ age_d }  , 
7678    ] , 
@@ -83,27 +85,32 @@ FROM projects as p
8385  INNER JOIN syncstrings as s 
8486  ON p.project_id = s.project_id 
8587WHERE p.deleted = true 
88+   AND users IS NULL 
8689  AND p.state ->> 'state' != 'deleted' 
90+ ORDER BY 
91+   p.project_id, s.string_id 
8792` ; 
8893
8994/* 
90-  This is more thorough than the above. It issues actual delete operations on data of projects marked as deleted. 
95+  This more thorough delete procedure comes after the above. 
96+  It issues actual delete operations on data of projects marked as deleted. 
9197 When done, it sets the state.state to "deleted". 
9298
9399 The operations involves deleting all syncstrings of that project (and associated with that, patches), 
94-  and only for on-prem setups, it also deletes all the data stored in the project on disk. 
100+  and only for on-prem setups, it also deletes all the data stored in the project on disk and various tables . 
95101
96-  This function is called every couple of hours. Hence ensure  it does  not run longer than the given max_run_m time (minutes) 
102+  This function is called every couple of hours. Hence it checks to  not run longer than the given max_run_m time (minutes).  
97103*/ 
98104export  async  function  cleanup_old_projects_data ( 
99105  db : PostgreSQL , 
100-   delay_ms  =  50 , 
101106  max_run_m  =  60 , 
102107)  { 
103108  const  settings  =  await  getServerSettings ( ) ; 
104109  const  on_prem  =  settings . kucalc  ===  KUCALC_ON_PREMISES ; 
110+   const  L0  =  log . extend ( "cleanup_old_projects_data" ) ; 
111+   const  L  =  L0 . debug ; 
105112
106-   log . debug ( "cleanup_old_projects_data" ,  {  delay_ms ,   max_run_m,  on_prem } ) ; 
113+   log . debug ( "cleanup_old_projects_data" ,  {  max_run_m,  on_prem } ) ; 
107114  const  start_ts  =  new  Date ( ) ; 
108115
109116  const  pool  =  getPool ( ) ; 
@@ -115,34 +122,95 @@ export async function cleanup_old_projects_data(
115122  for  ( const  row  of  rows )  { 
116123    const  {  project_id,  string_id }  =  row ; 
117124    if  ( start_ts  <  minutes_ago ( max_run_m ) )  { 
118-       log . debug ( 
119-         `cleanup_old_projects_data: too much time elapsed, breaking after ${ num }  , 
120-       ) ; 
125+       L ( `too much time elapsed, breaking after ${ num }  ) ; 
121126      break ; 
122127    } 
123128
124-     log . debug ( 
125-       `cleanup_old_projects_data: deleting syncstring ${ project_id } ${ string_id }  , 
126-     ) ; 
129+     L ( `deleting syncstring ${ project_id } ${ string_id }  ) ; 
127130    num  +=  1 ; 
128131    await  callback2 ( db . delete_syncstring ,  {  string_id } ) ; 
129132
130-     // wait for the given amount of delay_ms millio seconds  
131-     await  new  Promise ( ( done )  =>  setTimeout ( done ,  delay_ms ) ) ; 
133+     // wait a bit after deleting syncstrings, e.g. to let the standby db catch up  
134+     await  new  Promise ( ( done )  =>  setTimeout ( done ,  100 ) ) ; 
132135
136+     // Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes 
133137    if  ( pid  !=  project_id )  { 
134138      pid  =  project_id ; 
139+       const  L2  =  L0 . extend ( project_id ) . debug ; 
140+ 
135141      if  ( on_prem )  { 
136-         log . debug ( 
137-           `cleanup_old_projects_data: deleting project data in ${ project_id }  , 
138-         ) ; 
142+         L2 ( `cleanup_old_projects_data for project_id=${ project_id }  ) ; 
139143        // TODO: this only works on-prem, and requires the project files to be mounted 
140144
141-         log . debug ( `deleting all shared files in project ${ project_id }  ) ; 
145+         L2 ( `deleting all shared files in project ${ project_id }  ) ; 
142146        // TODO: do it directly like above, and also get rid of all those shares in the database 
147+ 
148+         const  delPublicPaths  =  await  bulk_delete ( { 
149+           table : "public_paths" , 
150+           field : "project_id" , 
151+           value : project_id , 
152+         } ) ; 
153+         L2 ( `deleted public_paths ${ delPublicPaths . rowsDeleted }  ) ; 
154+ 
155+         const  delProjectLog  =  await  bulk_delete ( { 
156+           table : "project_log" , 
157+           field : "project_id" , 
158+           value : project_id , 
159+         } ) ; 
160+         L2 ( `deleted project_log ${ delProjectLog . rowsDeleted }  ) ; 
161+ 
162+         const  delFileUse  =  await  bulk_delete ( { 
163+           table : "file_use" , 
164+           field : "project_id" , 
165+           value : project_id , 
166+         } ) ; 
167+         L2 ( `deleted file_use ${ delFileUse . rowsDeleted }  ) ; 
168+ 
169+         const  delAccessLog  =  await  bulk_delete ( { 
170+           table : "file_access_log" , 
171+           field : "project_id" , 
172+           value : project_id , 
173+         } ) ; 
174+         L2 ( `deleted file_access_log ${ delAccessLog . rowsDeleted }  ) ; 
175+ 
176+         const  delJupyterApiLog  =  await  bulk_delete ( { 
177+           table : "jupyter_api_log" , 
178+           field : "project_id" , 
179+           value : project_id , 
180+         } ) ; 
181+         L2 ( `deleted jupyter_api_log ${ delJupyterApiLog . rowsDeleted }  ) ; 
182+ 
183+         for  ( const  field  of  [ 
184+           "target_project_id" , 
185+           "source_project_id" , 
186+         ]  as  const )  { 
187+           const  delCopyPaths  =  await  bulk_delete ( { 
188+             table : "copy_paths" , 
189+             field, 
190+             value : project_id , 
191+           } ) ; 
192+           L2 ( `deleted copy_paths/${ field } ${ delCopyPaths . rowsDeleted }  ) ; 
193+         } 
194+ 
195+         const  delListings  =  await  bulk_delete ( { 
196+           table : "listings" , 
197+           field : "project_id" , 
198+           id : "project_id" ,  // TODO listings has a more complex ID, is this a problem? 
199+           value : project_id , 
200+         } ) ; 
201+         L2 ( `deleted ${ delListings . rowsDeleted }  ) ; 
202+ 
203+         const  delInviteTokens  =  await  bulk_delete ( { 
204+           table : "project_invite_tokens" , 
205+           field : "project_id" , 
206+           value : project_id , 
207+           id : "token" , 
208+         } ) ; 
209+         L2 ( `deleted ${ delInviteTokens . rowsDeleted }  ) ; 
143210      } 
144211
145212      // now, that we're done with that project, mark it as state.state ->> 'deleted' 
213+       // in addition to the flag "deleted = true" 
146214      await  callback2 ( db . set_project_state ,  { 
147215        project_id, 
148216        state : "deleted" , 
0 commit comments