added importer

This commit is contained in:
2024-06-01 16:43:00 -07:00
parent df4dbfdb1b
commit db57ae4447
6 changed files with 413 additions and 1 deletions

View File

@@ -0,0 +1,186 @@
package importer
import (
"context"
"encoding/json"
"io/ioutil"
"os"
"os/signal"
"syscall"
"time"
"github.com/mmcdole/gofeed"
"github.com/rs/zerolog"
"github.com/schollz/progressbar/v3"
"github.com/spf13/cobra"
"go.uber.org/fx"
"sneak.berlin/go/directory/internal/config"
"sneak.berlin/go/directory/internal/globals"
"sneak.berlin/go/directory/internal/logger"
"sneak.berlin/go/directory/internal/store"
_ "github.com/joho/godotenv/autoload"
)
type ImporterParams struct {
fx.In
Logger *logger.Logger
Globals *globals.Globals
Config *config.Config
Store *store.Store
}
type Importer struct {
startupTime time.Time
exitCode int
log *zerolog.Logger
params ImporterParams
ctx context.Context
cancelFunc context.CancelFunc
}
func New(lc fx.Lifecycle, params ImporterParams) (*Importer, error) {
i := new(Importer)
i.params = params
i.log = params.Logger.Get()
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
i.startupTime = time.Now()
go i.Run(ctx)
return nil
},
OnStop: func(ctx context.Context) error {
i.cleanShutdown(ctx)
return nil
},
})
return i, nil
}
func (i *Importer) Run(ctx context.Context) {
i.ctx, i.cancelFunc = context.WithCancel(ctx)
rootCmd := &cobra.Command{
Use: "importer",
Short: "Importer is a CLI for importing data into the directory",
}
importCmd := &cobra.Command{
Use: "import",
Short: "Import data into the directory",
}
importJSONCmd := &cobra.Command{
Use: "json [file]",
Short: "Import data from a JSON file",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
i.importFromJSON(args[0])
},
}
importOPMLCmd := &cobra.Command{
Use: "opml [file]",
Short: "Import data from an OPML file",
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
i.importFromOPML(args[0])
},
}
importCmd.AddCommand(importJSONCmd, importOPMLCmd)
rootCmd.AddCommand(importCmd)
go func() {
c := make(chan os.Signal, 1)
signal.Ignore(syscall.SIGPIPE)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
sig := <-c
i.log.Info().Msgf("signal received: %+v", sig)
if i.cancelFunc != nil {
i.cancelFunc()
}
}()
if err := rootCmd.ExecuteContext(i.ctx); err != nil {
i.log.Error().Err(err).Msg("command execution failed")
i.exitCode = 1
}
<-i.ctx.Done()
i.exitCode = 0
i.cleanShutdown(ctx)
}
func (i *Importer) importFromJSON(file string) {
i.log.Info().Msgf("importing from JSON file: %s", file)
data, err := ioutil.ReadFile(file)
if err != nil {
i.log.Error().Err(err).Msg("failed to read JSON file")
return
}
var records []map[string]interface{}
if err := json.Unmarshal(data, &records); err != nil {
i.log.Error().Err(err).Msg("failed to unmarshal JSON")
return
}
totalRecords := len(records)
bar := progressbar.NewOptions(totalRecords,
progressbar.OptionSetDescription("Importing records"),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionShowElapsedTime(),
progressbar.OptionShowRemainingTime(),
progressbar.OptionSetPredictTime(true),
)
for _, record := range records {
// Insert record into the database
// db.InsertRecord(record) // Replace with actual database insertion logic
bar.Add(1)
}
i.log.Info().Msg("JSON import completed")
}
func (i *Importer) importFromOPML(file string) {
i.log.Info().Msgf("importing from OPML file: %s", file)
data, err := ioutil.ReadFile(file)
if err != nil {
i.log.Error().Err(err).Msg("failed to read OPML file")
return
}
fp := gofeed.NewParser()
feed, err := fp.ParseString(string(data))
if err != nil {
i.log.Error().Err(err).Msg("failed to parse OPML")
return
}
totalOutlines := len(feed.Items)
bar := progressbar.NewOptions(totalOutlines,
progressbar.OptionSetDescription("Importing outlines"),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionShowElapsedTime(),
progressbar.OptionShowRemainingTime(),
progressbar.OptionSetPredictTime(true),
)
for _, outline := range feed.Items {
// Insert outline into the database
// db.InsertOutline(outline) // Replace with actual database insertion logic
bar.Add(1)
}
i.log.Info().Msg("OPML import completed")
}
func (i *Importer) cleanShutdown(ctx context.Context) {
i.log.Info().Msgf("shutting down")
os.Exit(i.exitCode)
}

View File

@@ -0,0 +1,87 @@
package store
import (
"strings"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
)
type SiteTag struct {
ID uint `gorm:"primaryKey"`
SiteID string `gorm:"not null"`
Value string `gorm:"not null"`
db *gorm.DB `gorm:"-"`
}
func (SiteTag) TableName() string {
return "site_tags"
}
func (s *Site) AddTag(value string) error {
value = strings.ToLower(value)
if s.HasTag(value) {
log.Debug().Str("tag", value).Msg("tag already exists")
return nil
}
tag := &SiteTag{
SiteID: s.ID,
Value: value,
db: s.db,
}
result := s.db.Create(tag)
if result.Error != nil {
log.Error().Err(result.Error).Msg("unable to add site tag")
return result.Error
}
log.Debug().
Str("siteid", s.ID).
Str("tag", value).
Msg("site tag added")
return nil
}
func (s *Site) DeleteAttribute(value string) error {
value = strings.ToLower(value)
result := s.db.Where("user_id = ? AND value = ?", s.ID, value).Delete(&SiteTag{})
if result.Error != nil {
log.Error().Err(result.Error).Msg("unable to delete site tag")
return result.Error
}
log.Debug().
Str("siteid", s.ID).
Str("tag", value).
Msg("site tag deleted")
return nil
}
func (s *Site) HasTag(value string) bool {
value = strings.ToLower(value)
var count int64
result := s.db.Model(&SiteTag{}).Where("site_id = ? AND value = ?", s.ID, value).Count(&count)
if result.Error != nil {
log.Error().Err(result.Error).Msg("unable to check site tag existence")
return false
}
return count > 0
}
func (s *Site) GetTags() ([]string, error) {
var tags []SiteTag
result := s.db.Where("site_id = ?", s.ID).Find(&tags)
if result.Error != nil {
log.Error().Err(result.Error).Msg("unable to get site tags")
return nil, result.Error
}
var tagValues []string
for _, attr := range tags {
tagValues = append(tagValues, attr.Value)
}
return tagValues, nil
}

51
internal/store/website.go Normal file
View File

@@ -0,0 +1,51 @@
package store
import (
"crypto/rand"
"time"
"github.com/oklog/ulid/v2"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
)
type Site struct {
ID string `gorm:"primaryKey"`
Domain string `gorm:"unique"`
URL string
CreatedAt *time.Time
FirstSeen *time.Time
LastSeen *time.Time
Tags []SiteTag `gorm:"foreignKey:SiteID"`
db *gorm.DB `gorm:"-"`
}
func (Site) TableName() string {
return "sites"
}
func (s *Store) AddSite(url string) (*Site, error) {
ID := ulid.MustNew(ulid.Timestamp(time.Now()), rand.Reader).String()
createdAt := time.Now()
site := &Site{
ID: ID,
URL: url,
CreatedAt: &createdAt,
db: s.db,
}
result := s.db.Create(site)
if result.Error != nil {
log.Error().
Str("siteid", ID).
Err(result.Error).
Msg("unable to insert site into db")
return nil, result.Error
}
log.Debug().
Str("siteid", ID).
Str("url", url).
Msg("site added to db")
return site, nil
}