From db57ae4447905007bbdf0df2ac2e77ed32cec4ad Mon Sep 17 00:00:00 2001 From: sneak Date: Sat, 1 Jun 2024 16:43:00 -0700 Subject: [PATCH] added importer --- cmd/importer/main.go | 33 ++++++ go.mod | 16 ++- go.sum | 41 ++++++++ internal/importer/importer.go | 186 ++++++++++++++++++++++++++++++++++ internal/store/site_tag.go | 87 ++++++++++++++++ internal/store/website.go | 51 ++++++++++ 6 files changed, 413 insertions(+), 1 deletion(-) create mode 100644 cmd/importer/main.go create mode 100644 internal/importer/importer.go create mode 100644 internal/store/site_tag.go create mode 100644 internal/store/website.go diff --git a/cmd/importer/main.go b/cmd/importer/main.go new file mode 100644 index 0000000..abf432d --- /dev/null +++ b/cmd/importer/main.go @@ -0,0 +1,33 @@ +package main + +import ( + "go.uber.org/fx" + "sneak.berlin/go/directory/internal/config" + "sneak.berlin/go/directory/internal/database" + "sneak.berlin/go/directory/internal/globals" + "sneak.berlin/go/directory/internal/importer" + "sneak.berlin/go/directory/internal/logger" +) + +var ( + Appname string = "importer" + Version string + Buildarch string +) + +func main() { + globals.Appname = Appname + globals.Version = Version + globals.Buildarch = Buildarch + + fx.New( + fx.Provide( + config.New, + database.New, + globals.New, + logger.New, + importer.New, + ), + fx.Invoke(func(*importer.Importer) {}), + ).Run() +} diff --git a/go.mod b/go.mod index 1fe7616..4c42630 100644 --- a/go.mod +++ b/go.mod @@ -21,32 +21,44 @@ require ( require ( github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53 // indirect github.com/CloudyKit/jet/v6 v6.2.0 // indirect + github.com/PuerkitoBio/goquery v1.8.0 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/hako/durafmt v0.0.0-20210608085754-5c1018a4e16b // indirect github.com/hashicorp/hcl v1.0.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect github.com/jackc/pgx/v5 v5.4.3 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-sqlite3 v1.14.17 // indirect + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/mmcdole/gofeed v1.3.0 // indirect + github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oklog/ulid/v2 v2.1.0 // indirect github.com/pelletier/go-toml/v2 v2.2.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.13.0 // indirect + github.com/rivo/uniseg v0.4.7 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect + github.com/schollz/progressbar/v3 v3.14.3 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect + github.com/spf13/cobra v1.8.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.uber.org/dig v1.17.1 // indirect @@ -54,7 +66,9 @@ require ( go.uber.org/zap v1.26.0 // indirect golang.org/x/crypto v0.22.0 // indirect golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f // indirect - golang.org/x/sys v0.19.0 // indirect + golang.org/x/net v0.24.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/term v0.20.0 // indirect golang.org/x/text v0.14.0 // indirect google.golang.org/protobuf v1.33.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/go.sum b/go.sum index e0af41c..755b12d 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,10 @@ github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53 h1:sR+/8Yb4s github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= github.com/CloudyKit/jet/v6 v6.2.0 h1:EpcZ6SR9n28BUGtNJSvlBqf90IpjeFr36Tizxhn/oME= github.com/CloudyKit/jet/v6 v6.2.0/go.mod h1:d3ypHeIRNo2+XyqnGA8s+aphtcVpjP5hPwP/Lzo7Ro4= +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -11,6 +15,7 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -30,12 +35,15 @@ github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3Bop github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4 h1:60gBOooTSmNtrqNaRvrDbi8VAne0REaek2agjnITKSw= github.com/hako/durafmt v0.0.0-20191009132224-3f39dc1ed9f4/go.mod h1:5Scbynm8dF1XAPwIwkGPqzkM/shndPm79Jd1003hTjE= github.com/hako/durafmt v0.0.0-20210608085754-5c1018a4e16b h1:wDUNC2eKiL35DbLvsDhiblTUXHxcOPwQSCzi7xpQUN4= github.com/hako/durafmt v0.0.0-20210608085754-5c1018a4e16b/go.mod h1:VzxiSdG6j1pi7rwGm/xYI5RbtpBgM8sARDXlvEvxlu0= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk= @@ -48,6 +56,9 @@ github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -62,8 +73,19 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM= github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4= +github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE= +github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk= +github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU= github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= @@ -84,15 +106,20 @@ github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+a github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGKX7o= github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= +github.com/schollz/progressbar/v3 v3.14.3 h1:oOuWW19ka12wxYU1XblR4n16wF/2Y1dBLMarMo6p4xU= +github.com/schollz/progressbar/v3 v3.14.3/go.mod h1:aT3UQ7yGm+2ZjeXPqsjTenwL3ddUiuZ0kfQ/2tHlyNI= github.com/slok/go-http-metrics v0.12.0 h1:mAb7hrX4gB4ItU6NkFoKYdBslafg3o60/HbGBRsKaG8= github.com/slok/go-http-metrics v0.12.0/go.mod h1:Ee/mdT9BYvGrlGzlClkK05pP2hRHmVbRF9dtUVS8LNA= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= @@ -101,6 +128,8 @@ github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ= @@ -133,13 +162,25 @@ golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f h1:99ci1mjWVBWwJiEKYY6jWa4d2nTQVIEhZIptnrVb1XY= golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= +golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/internal/importer/importer.go b/internal/importer/importer.go new file mode 100644 index 0000000..04cd626 --- /dev/null +++ b/internal/importer/importer.go @@ -0,0 +1,186 @@ +package importer + +import ( + "context" + "encoding/json" + "io/ioutil" + "os" + "os/signal" + "syscall" + "time" + + "github.com/mmcdole/gofeed" + "github.com/rs/zerolog" + "github.com/schollz/progressbar/v3" + "github.com/spf13/cobra" + "go.uber.org/fx" + "sneak.berlin/go/directory/internal/config" + "sneak.berlin/go/directory/internal/globals" + "sneak.berlin/go/directory/internal/logger" + "sneak.berlin/go/directory/internal/store" + + _ "github.com/joho/godotenv/autoload" +) + +type ImporterParams struct { + fx.In + Logger *logger.Logger + Globals *globals.Globals + Config *config.Config + Store *store.Store +} + +type Importer struct { + startupTime time.Time + exitCode int + log *zerolog.Logger + params ImporterParams + ctx context.Context + cancelFunc context.CancelFunc +} + +func New(lc fx.Lifecycle, params ImporterParams) (*Importer, error) { + i := new(Importer) + i.params = params + i.log = params.Logger.Get() + + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + i.startupTime = time.Now() + go i.Run(ctx) + return nil + }, + OnStop: func(ctx context.Context) error { + i.cleanShutdown(ctx) + return nil + }, + }) + return i, nil +} + +func (i *Importer) Run(ctx context.Context) { + i.ctx, i.cancelFunc = context.WithCancel(ctx) + + rootCmd := &cobra.Command{ + Use: "importer", + Short: "Importer is a CLI for importing data into the directory", + } + + importCmd := &cobra.Command{ + Use: "import", + Short: "Import data into the directory", + } + + importJSONCmd := &cobra.Command{ + Use: "json [file]", + Short: "Import data from a JSON file", + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + i.importFromJSON(args[0]) + }, + } + + importOPMLCmd := &cobra.Command{ + Use: "opml [file]", + Short: "Import data from an OPML file", + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + i.importFromOPML(args[0]) + }, + } + + importCmd.AddCommand(importJSONCmd, importOPMLCmd) + rootCmd.AddCommand(importCmd) + + go func() { + c := make(chan os.Signal, 1) + signal.Ignore(syscall.SIGPIPE) + signal.Notify(c, os.Interrupt, syscall.SIGTERM) + sig := <-c + i.log.Info().Msgf("signal received: %+v", sig) + if i.cancelFunc != nil { + i.cancelFunc() + } + }() + + if err := rootCmd.ExecuteContext(i.ctx); err != nil { + i.log.Error().Err(err).Msg("command execution failed") + i.exitCode = 1 + } + + <-i.ctx.Done() + i.exitCode = 0 + i.cleanShutdown(ctx) +} + +func (i *Importer) importFromJSON(file string) { + i.log.Info().Msgf("importing from JSON file: %s", file) + data, err := ioutil.ReadFile(file) + if err != nil { + i.log.Error().Err(err).Msg("failed to read JSON file") + return + } + + var records []map[string]interface{} + if err := json.Unmarshal(data, &records); err != nil { + i.log.Error().Err(err).Msg("failed to unmarshal JSON") + return + } + + totalRecords := len(records) + bar := progressbar.NewOptions(totalRecords, + progressbar.OptionSetDescription("Importing records"), + progressbar.OptionShowCount(), + progressbar.OptionShowIts(), + progressbar.OptionShowElapsedTime(), + progressbar.OptionShowRemainingTime(), + progressbar.OptionSetPredictTime(true), + ) + + for _, record := range records { + // Insert record into the database + // db.InsertRecord(record) // Replace with actual database insertion logic + bar.Add(1) + } + + i.log.Info().Msg("JSON import completed") +} + +func (i *Importer) importFromOPML(file string) { + i.log.Info().Msgf("importing from OPML file: %s", file) + data, err := ioutil.ReadFile(file) + if err != nil { + i.log.Error().Err(err).Msg("failed to read OPML file") + return + } + + fp := gofeed.NewParser() + feed, err := fp.ParseString(string(data)) + if err != nil { + i.log.Error().Err(err).Msg("failed to parse OPML") + return + } + + totalOutlines := len(feed.Items) + bar := progressbar.NewOptions(totalOutlines, + progressbar.OptionSetDescription("Importing outlines"), + progressbar.OptionShowCount(), + progressbar.OptionShowIts(), + progressbar.OptionShowElapsedTime(), + progressbar.OptionShowRemainingTime(), + progressbar.OptionSetPredictTime(true), + ) + + for _, outline := range feed.Items { + // Insert outline into the database + // db.InsertOutline(outline) // Replace with actual database insertion logic + bar.Add(1) + } + + i.log.Info().Msg("OPML import completed") +} + +func (i *Importer) cleanShutdown(ctx context.Context) { + i.log.Info().Msgf("shutting down") + os.Exit(i.exitCode) +} diff --git a/internal/store/site_tag.go b/internal/store/site_tag.go new file mode 100644 index 0000000..55a55d0 --- /dev/null +++ b/internal/store/site_tag.go @@ -0,0 +1,87 @@ +package store + +import ( + "strings" + + "github.com/rs/zerolog/log" + "gorm.io/gorm" +) + +type SiteTag struct { + ID uint `gorm:"primaryKey"` + SiteID string `gorm:"not null"` + Value string `gorm:"not null"` + db *gorm.DB `gorm:"-"` +} + +func (SiteTag) TableName() string { + return "site_tags" +} + +func (s *Site) AddTag(value string) error { + value = strings.ToLower(value) + if s.HasTag(value) { + log.Debug().Str("tag", value).Msg("tag already exists") + return nil + } + + tag := &SiteTag{ + SiteID: s.ID, + Value: value, + db: s.db, + } + + result := s.db.Create(tag) + if result.Error != nil { + log.Error().Err(result.Error).Msg("unable to add site tag") + return result.Error + } + + log.Debug(). + Str("siteid", s.ID). + Str("tag", value). + Msg("site tag added") + return nil +} + +func (s *Site) DeleteAttribute(value string) error { + value = strings.ToLower(value) + result := s.db.Where("user_id = ? AND value = ?", s.ID, value).Delete(&SiteTag{}) + if result.Error != nil { + log.Error().Err(result.Error).Msg("unable to delete site tag") + return result.Error + } + + log.Debug(). + Str("siteid", s.ID). + Str("tag", value). + Msg("site tag deleted") + return nil +} + +func (s *Site) HasTag(value string) bool { + value = strings.ToLower(value) + var count int64 + result := s.db.Model(&SiteTag{}).Where("site_id = ? AND value = ?", s.ID, value).Count(&count) + if result.Error != nil { + log.Error().Err(result.Error).Msg("unable to check site tag existence") + return false + } + return count > 0 +} + +func (s *Site) GetTags() ([]string, error) { + var tags []SiteTag + result := s.db.Where("site_id = ?", s.ID).Find(&tags) + if result.Error != nil { + log.Error().Err(result.Error).Msg("unable to get site tags") + return nil, result.Error + } + + var tagValues []string + for _, attr := range tags { + tagValues = append(tagValues, attr.Value) + } + + return tagValues, nil +} diff --git a/internal/store/website.go b/internal/store/website.go new file mode 100644 index 0000000..36f267b --- /dev/null +++ b/internal/store/website.go @@ -0,0 +1,51 @@ +package store + +import ( + "crypto/rand" + "time" + + "github.com/oklog/ulid/v2" + "github.com/rs/zerolog/log" + "gorm.io/gorm" +) + +type Site struct { + ID string `gorm:"primaryKey"` + Domain string `gorm:"unique"` + URL string + CreatedAt *time.Time + FirstSeen *time.Time + LastSeen *time.Time + Tags []SiteTag `gorm:"foreignKey:SiteID"` + db *gorm.DB `gorm:"-"` +} + +func (Site) TableName() string { + return "sites" +} + +func (s *Store) AddSite(url string) (*Site, error) { + ID := ulid.MustNew(ulid.Timestamp(time.Now()), rand.Reader).String() + createdAt := time.Now() + site := &Site{ + ID: ID, + URL: url, + CreatedAt: &createdAt, + db: s.db, + } + + result := s.db.Create(site) + if result.Error != nil { + log.Error(). + Str("siteid", ID). + Err(result.Error). + Msg("unable to insert site into db") + return nil, result.Error + } + + log.Debug(). + Str("siteid", ID). + Str("url", url). + Msg("site added to db") + return site, nil +}