@@ -8,23 +8,33 @@ Code related to permanently deleting projects.
88*/ 
99
1010import  getLogger  from  "@cocalc/backend/logger" ; 
11+ import  {  newCounter  }  from  "@cocalc/backend/metrics" ; 
1112import  getPool  from  "@cocalc/database/pool" ; 
1213import  {  getServerSettings  }  from  "@cocalc/database/settings" ; 
1314import  {  callback2  }  from  "@cocalc/util/async-utils" ; 
1415import  {  KUCALC_ON_PREMISES  }  from  "@cocalc/util/db-schema/site-defaults" ; 
1516import  {  minutes_ago  }  from  "@cocalc/util/misc" ; 
16- import  {  bulk_delete  }  from  "./bulk-delete" ; 
17+ import  {  bulkDelete  }  from  "./bulk-delete" ; 
1718import  {  PostgreSQL  }  from  "./types" ; 
1819
1920const  log  =  getLogger ( "db:delete-projects" ) ; 
2021
22+ const  delete_projects_prom  =  newCounter ( 
23+   "database" , 
24+   "delete_projects_total" , 
25+   "Deleting projects and associated data operations counter." , 
26+   [ "op" ] , 
27+ ) ; 
28+ 
2129/* 
2230Permanently delete from the database all project records, where the 
2331project is explicitly deleted already (so the deleted field is true). 
2432Call this function to setup projects for permanent deletion.  This blanks 
2533the user field so the user no longer can access the project, and we don't 
2634know that the user had anything to do with the project.  A separate phase 
2735later then purges these projects from disk as well as the database. 
36+ 
37+ TODO:it's referenced from postgres-server-queries.coffee, but is it actually used anywhere? 
2838*/ 
2939export  async  function  permanently_unlink_all_deleted_projects_of_user ( 
3040  db : PostgreSQL , 
@@ -85,7 +95,7 @@ FROM projects as p
8595  INNER JOIN syncstrings as s 
8696  ON p.project_id = s.project_id 
8797WHERE p.deleted = true 
88-   AND users IS NULL 
98+   AND p. users IS NULL 
8999  AND p.state ->> 'state' != 'deleted' 
90100ORDER BY 
91101  p.project_id, s.string_id 
@@ -117,6 +127,7 @@ export async function cleanup_old_projects_data(
117127  const  {  rows }  =  await  pool . query ( Q_CLEANUP_SYNCSTRINGS ) ; 
118128
119129  let  num  =  0 ; 
130+   let  num2  =  0 ; 
120131  let  pid  =  "" ; 
121132
122133  for  ( const  row  of  rows )  { 
@@ -129,84 +140,28 @@ export async function cleanup_old_projects_data(
129140    L ( `deleting syncstring ${ project_id } ${ string_id }  ) ; 
130141    num  +=  1 ; 
131142    await  callback2 ( db . delete_syncstring ,  {  string_id } ) ; 
143+     delete_projects_prom . labels ( "syncstring" ) . inc ( ) ; 
132144
133145    // wait a bit after deleting syncstrings, e.g. to let the standby db catch up 
134-     await  new  Promise ( ( done )  =>  setTimeout ( done ,  100 ) ) ; 
146+     await  new  Promise ( ( done )  =>  setTimeout ( done ,  10 ) ) ; 
135147
136148    // Q_CLEANUP_SYNCSTRINGS orders by project_id, hence we trigger project specific actions when the id changes 
137149    if  ( pid  !=  project_id )  { 
138150      pid  =  project_id ; 
139151      const  L2  =  L0 . extend ( project_id ) . debug ; 
152+       delete_projects_prom . labels ( "project" ) . inc ( ) ; 
153+       num2  +=  1 ; 
154+       let  delRows  =  0 ; 
140155
141156      if  ( on_prem )  { 
142-         L2 ( `cleanup_old_projects_data for project_id= ${ project_id }  ) ; 
157+         L2 ( `delete all project files ` ) ; 
143158        // TODO: this only works on-prem, and requires the project files to be mounted 
144159
145-         L2 ( `deleting all shared files in project  ${ project_id }  ) ; 
160+         L2 ( `deleting all shared files` ) ; 
146161        // TODO: do it directly like above, and also get rid of all those shares in the database 
147162
148-         const  delPublicPaths  =  await  bulk_delete ( { 
149-           table : "public_paths" , 
150-           field : "project_id" , 
151-           value : project_id , 
152-         } ) ; 
153-         L2 ( `deleted public_paths ${ delPublicPaths . rowsDeleted }  ) ; 
154- 
155-         const  delProjectLog  =  await  bulk_delete ( { 
156-           table : "project_log" , 
157-           field : "project_id" , 
158-           value : project_id , 
159-         } ) ; 
160-         L2 ( `deleted project_log ${ delProjectLog . rowsDeleted }  ) ; 
161- 
162-         const  delFileUse  =  await  bulk_delete ( { 
163-           table : "file_use" , 
164-           field : "project_id" , 
165-           value : project_id , 
166-         } ) ; 
167-         L2 ( `deleted file_use ${ delFileUse . rowsDeleted }  ) ; 
168- 
169-         const  delAccessLog  =  await  bulk_delete ( { 
170-           table : "file_access_log" , 
171-           field : "project_id" , 
172-           value : project_id , 
173-         } ) ; 
174-         L2 ( `deleted file_access_log ${ delAccessLog . rowsDeleted }  ) ; 
175- 
176-         const  delJupyterApiLog  =  await  bulk_delete ( { 
177-           table : "jupyter_api_log" , 
178-           field : "project_id" , 
179-           value : project_id , 
180-         } ) ; 
181-         L2 ( `deleted jupyter_api_log ${ delJupyterApiLog . rowsDeleted }  ) ; 
182- 
183-         for  ( const  field  of  [ 
184-           "target_project_id" , 
185-           "source_project_id" , 
186-         ]  as  const )  { 
187-           const  delCopyPaths  =  await  bulk_delete ( { 
188-             table : "copy_paths" , 
189-             field, 
190-             value : project_id , 
191-           } ) ; 
192-           L2 ( `deleted copy_paths/${ field } ${ delCopyPaths . rowsDeleted }  ) ; 
193-         } 
194- 
195-         const  delListings  =  await  bulk_delete ( { 
196-           table : "listings" , 
197-           field : "project_id" , 
198-           id : "project_id" ,  // TODO listings has a more complex ID, is this a problem? 
199-           value : project_id , 
200-         } ) ; 
201-         L2 ( `deleted ${ delListings . rowsDeleted }  ) ; 
202- 
203-         const  delInviteTokens  =  await  bulk_delete ( { 
204-           table : "project_invite_tokens" , 
205-           field : "project_id" , 
206-           value : project_id , 
207-           id : "token" , 
208-         } ) ; 
209-         L2 ( `deleted ${ delInviteTokens . rowsDeleted }  ) ; 
163+         // for now, on-prem only as well. This gets rid of all sorts of data in tables specific to the given project. 
164+         delRows  +=  await  delete_associated_project_data ( L2 ,  project_id ) ; 
210165      } 
211166
212167      // now, that we're done with that project, mark it as state.state ->> 'deleted' 
@@ -215,6 +170,73 @@ export async function cleanup_old_projects_data(
215170        project_id, 
216171        state : "deleted" , 
217172      } ) ; 
173+       L2 ( 
174+         `finished deleting project data | deleted ${ delRows }  , 
175+       ) ; 
218176    } 
219177  } 
178+   L ( `finished deleting ${ num } ${ num2 }  ) ; 
179+ } 
180+ 
181+ async  function  delete_associated_project_data ( 
182+   L2 , 
183+   project_id : string , 
184+ ) : Promise < number >  { 
185+   let  total  =  0 ; 
186+   // collecting tables, where the primary key is the default (i.e. "id") and 
187+   // the field to check is always called "project_id" 
188+   const  tables  =  [ 
189+     "public_paths" , 
190+     "project_log" , 
191+     "file_use" , 
192+     "file_access_log" , 
193+     "jupyter_api_log" , 
194+     "openai_chatgpt_log" , 
195+   ]  as  const ; 
196+ 
197+   for  ( const  table  of  tables )  { 
198+     const  {  rowsDeleted }  =  await  bulkDelete ( { 
199+       table, 
200+       field : "project_id" , 
201+       value : project_id , 
202+     } ) ; 
203+     total  +=  rowsDeleted ; 
204+     L2 ( `deleted ${ table } ${ rowsDeleted }  ) ; 
205+   } 
206+ 
207+   // these tables are different, i.e. another id, or the field to check the project_id value against is called differently 
208+ 
209+   for  ( const  field  of  [ "target_project_id" ,  "source_project_id" ]  as  const )  { 
210+     const  {  rowsDeleted }  =  await  bulkDelete ( { 
211+       table : "copy_paths" , 
212+       field, 
213+       value : project_id , 
214+     } ) ; 
215+     total  +=  rowsDeleted ; 
216+     L2 ( `deleted copy_paths/${ field } ${ rowsDeleted }  ) ; 
217+   } 
218+ 
219+   { 
220+     const  {  rowsDeleted }  =  await  bulkDelete ( { 
221+       table : "listings" , 
222+       field : "project_id" , 
223+       id : "project_id" ,  // TODO listings has a more complex ID, is this a problem? 
224+       value : project_id , 
225+     } ) ; 
226+     total  +=  rowsDeleted ; 
227+     L2 ( `deleted ${ rowsDeleted }  ) ; 
228+   } 
229+ 
230+   { 
231+     const  {  rowsDeleted }  =  await  bulkDelete ( { 
232+       table : "project_invite_tokens" , 
233+       field : "project_id" , 
234+       value : project_id , 
235+       id : "token" , 
236+     } ) ; 
237+     total  +=  rowsDeleted ; 
238+     L2 ( `deleted ${ rowsDeleted }  ) ; 
239+   } 
240+ 
241+   return  total ; 
220242} 
0 commit comments