2024-08-27 16:30:04 +05:30

149 lines
4.5 KiB
Go

package filedata
import (
"context"
"database/sql"
"errors"
"fmt"
"github.com/ente-io/museum/ente/filedata"
fileDataRepo "github.com/ente-io/museum/pkg/repo/filedata"
enteTime "github.com/ente-io/museum/pkg/utils/time"
log "github.com/sirupsen/logrus"
"time"
)
// StartDataDeletion clears associated file data from the object store
func (c *Controller) StartDataDeletion() {
go c.startDeleteWorkers(1)
}
func (c *Controller) startDeleteWorkers(n int) {
log.Infof("Starting %d delete workers for fileData", n)
for i := 0; i < n; i++ {
go c.delete(i)
// Stagger the workers
time.Sleep(time.Duration(2*i+1) * time.Minute)
}
}
// Entry point for the delete worker (goroutine)
//
// i is an arbitrary index of the current routine.
func (c *Controller) delete(i int) {
// This is just
//
// while (true) { delete() }
//
// but with an extra sleep for a bit if nothing got deleted - both when
// something's wrong, or there's nothing to do.
for {
err := c.tryDelete()
if err != nil {
// Sleep in proportion to the (arbitrary) index to space out the
// workers further.
time.Sleep(time.Duration(i+1) * time.Minute)
}
}
}
func (c *Controller) tryDelete() error {
newLockTime := enteTime.MicrosecondsAfterMinutes(10)
row, err := c.Repo.GetPendingSyncDataAndExtendLock(context.Background(), newLockTime, true)
if err != nil {
if !errors.Is(err, sql.ErrNoRows) {
log.Errorf("Could not fetch row for deletion: %s", err)
}
return err
}
err = c.deleteFileRow(*row)
if err != nil {
log.Errorf("Could not delete file data: %s", err)
return err
}
return nil
}
func (c *Controller) deleteFileRow(fileDataRow filedata.Row) error {
if !fileDataRow.IsDeleted {
return fmt.Errorf("file %d is not marked as deleted", fileDataRow.FileID)
}
fileID := fileDataRow.FileID
ownerID, err := c.FileRepo.GetOwnerID(fileID)
if err != nil {
return err
}
if fileDataRow.UserID != ownerID {
// this should never happen
panic(fmt.Sprintf("file %d does not belong to user %d", fileID, ownerID))
}
ctxLogger := log.WithField("file_id", fileDataRow.DeleteFromBuckets).WithField("type", fileDataRow.Type).WithField("user_id", fileDataRow.UserID)
objectKeys := filedata.AllObjects(fileID, ownerID, fileDataRow.Type)
bucketColumnMap, err := getMapOfBucketItToColumn(fileDataRow)
if err != nil {
ctxLogger.WithError(err).Error("Failed to get bucketColumnMap")
return err
}
// Delete objects and remove buckets
for bucketID, columnName := range bucketColumnMap {
for _, objectKey := range objectKeys {
err := c.ObjectCleanupController.DeleteObjectFromDataCenter(objectKey, bucketID)
if err != nil {
ctxLogger.WithError(err).WithFields(log.Fields{
"bucketID": bucketID,
"column": columnName,
"objectKey": objectKey,
}).Error("Failed to delete object from datacenter")
return err
}
}
dbErr := c.Repo.RemoveBucket(fileDataRow, bucketID, columnName)
if dbErr != nil {
ctxLogger.WithError(dbErr).WithFields(log.Fields{
"bucketID": bucketID,
"column": columnName,
}).Error("Failed to remove bucket from db")
return dbErr
}
}
// Delete from Latest bucket
for k := range objectKeys {
err = c.ObjectCleanupController.DeleteObjectFromDataCenter(objectKeys[k], fileDataRow.LatestBucket)
if err != nil {
ctxLogger.WithError(err).Error("Failed to delete object from datacenter")
return err
}
}
dbErr := c.Repo.DeleteFileData(context.Background(), fileDataRow)
if dbErr != nil {
ctxLogger.WithError(dbErr).Error("Failed to remove from db")
return err
}
return nil
}
func getMapOfBucketItToColumn(row filedata.Row) (map[string]string, error) {
bucketColumnMap := make(map[string]string)
for _, bucketID := range row.DeleteFromBuckets {
if existingColumn, exists := bucketColumnMap[bucketID]; exists {
return nil, fmt.Errorf("duplicate DeleteFromBuckets ID found: %s in column %s", bucketID, existingColumn)
}
bucketColumnMap[bucketID] = fileDataRepo.DeletionColumn
}
for _, bucketID := range row.ReplicatedBuckets {
if existingColumn, exists := bucketColumnMap[bucketID]; exists {
return nil, fmt.Errorf("duplicate ReplicatedBuckets ID found: %s in column %s", bucketID, existingColumn)
}
bucketColumnMap[bucketID] = fileDataRepo.ReplicationColumn
}
for _, bucketID := range row.InflightReplicas {
if existingColumn, exists := bucketColumnMap[bucketID]; exists {
return nil, fmt.Errorf("duplicate InFlightBucketID found: %s in column %s", bucketID, existingColumn)
}
bucketColumnMap[bucketID] = fileDataRepo.InflightRepColumn
}
return bucketColumnMap, nil
}