146 lines
3.7 KiB
Elixir
146 lines
3.7 KiB
Elixir
|
defmodule BdfrBrowser.Importer do
|
||
|
require Logger
|
||
|
|
||
|
use GenServer
|
||
|
|
||
|
alias BdfrBrowser.{Comment, Post, Repo, Subreddit}
|
||
|
|
||
|
def start_link([]) do
|
||
|
GenServer.start_link(__MODULE__, [], name: __MODULE__)
|
||
|
end
|
||
|
|
||
|
def subreddits do
|
||
|
_ = Logger.info("Importing subreddits ...")
|
||
|
|
||
|
folders = list_folders(sort: :asc)
|
||
|
|
||
|
for folder <- folders do
|
||
|
%Subreddit{name: folder}
|
||
|
|> Repo.insert(
|
||
|
on_conflict: :nothing,
|
||
|
conflict_target: :name
|
||
|
)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def posts_and_comments do
|
||
|
_ = Logger.info("Importing posts and comments ...")
|
||
|
|
||
|
result =
|
||
|
for subreddit <- list_folders(sort: :asc) do
|
||
|
_ = Logger.info("Importing entries from `#{subreddit}' ...")
|
||
|
|
||
|
subreddit_record = Repo.get_by(Subreddit, name: subreddit)
|
||
|
|
||
|
for date <- list_folders(paths: [subreddit]) do
|
||
|
_ = Logger.debug("Importing entries from `#{subreddit}' on `#{date}' ...")
|
||
|
|
||
|
for post <- read_posts(paths: [subreddit, date], ext: ".json") do
|
||
|
_ = Logger.debug("Importing `#{post["id"]}' from `#{subreddit}' ...")
|
||
|
|
||
|
{:ok, post_record} = import_post(post, subreddit_record)
|
||
|
comment_records = for comment <- post["comments"], do: import_comment(comment, post_record, nil)
|
||
|
|
||
|
{post_record, List.flatten(comment_records)}
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
List.flatten(result)
|
||
|
end
|
||
|
|
||
|
def background_import do
|
||
|
GenServer.cast(__MODULE__, :background_import)
|
||
|
end
|
||
|
|
||
|
# Callbacks
|
||
|
|
||
|
@impl true
|
||
|
def init([]) do
|
||
|
{:ok, nil}
|
||
|
end
|
||
|
|
||
|
@impl true
|
||
|
def handle_cast(:background_import, state) do
|
||
|
_ = subreddits()
|
||
|
_ = posts_and_comments()
|
||
|
{:noreply, state}
|
||
|
end
|
||
|
|
||
|
# Helper
|
||
|
|
||
|
defp list_folders(args) do
|
||
|
paths = Keyword.get(args, :paths, [])
|
||
|
extname = Keyword.get(args, :ext, "")
|
||
|
sort = Keyword.get(args, :sort, :desc)
|
||
|
base_directory = Application.fetch_env!(:bdfr_browser, :base_directory)
|
||
|
|
||
|
[base_directory | paths]
|
||
|
|> Path.join()
|
||
|
|> File.ls!()
|
||
|
|> Enum.filter(fn s -> not String.starts_with?(s, ".") and Path.extname(s) == extname end)
|
||
|
|> Enum.sort_by(&String.downcase/1, sort)
|
||
|
end
|
||
|
|
||
|
defp read_posts(args) do
|
||
|
posts = list_folders(args)
|
||
|
sort = Keyword.get(args, :sort, :desc)
|
||
|
|
||
|
base_directory = Application.fetch_env!(:bdfr_browser, :base_directory)
|
||
|
post_dir = Path.join([base_directory | Keyword.fetch!(args, :paths)])
|
||
|
|
||
|
parsed_posts =
|
||
|
for post <- posts do
|
||
|
file_path = Path.join([post_dir, post])
|
||
|
parsed = file_path |> File.read!() |> Jason.decode!()
|
||
|
Map.put(parsed, "filename", post)
|
||
|
end
|
||
|
|
||
|
Enum.sort_by(parsed_posts, fn p -> p["created_utc"] end, sort)
|
||
|
end
|
||
|
|
||
|
defp import_post(post, subreddit) do
|
||
|
id = post["id"]
|
||
|
|
||
|
%Post{
|
||
|
id: id,
|
||
|
title: post["title"],
|
||
|
selftext: post["selftext"],
|
||
|
url: post["url"],
|
||
|
permalink: post["permalink"],
|
||
|
author: post["author"],
|
||
|
upvote_ratio: post["upvote_ratio"],
|
||
|
posted_at: DateTime.from_unix!(trunc(post["created_utc"])),
|
||
|
filename: Path.basename(post["filename"], ".json"),
|
||
|
subreddit: subreddit
|
||
|
}
|
||
|
|> Repo.insert(
|
||
|
on_conflict: [set: [id: id]],
|
||
|
conflict_target: :id
|
||
|
)
|
||
|
end
|
||
|
|
||
|
defp import_comment(comment, post, parent) do
|
||
|
id = comment["id"]
|
||
|
|
||
|
{:ok, parent} =
|
||
|
%Comment{
|
||
|
id: id,
|
||
|
author: comment["author"],
|
||
|
body: comment["body"],
|
||
|
score: comment["score"],
|
||
|
posted_at: DateTime.from_unix!(trunc(comment["created_utc"])),
|
||
|
post: post,
|
||
|
parent: parent
|
||
|
}
|
||
|
|> Repo.insert(
|
||
|
on_conflict: [set: [id: id]],
|
||
|
conflict_target: :id
|
||
|
)
|
||
|
|
||
|
children = for child <- comment["replies"], do: import_comment(child, post, parent)
|
||
|
|
||
|
[parent] ++ children
|
||
|
end
|
||
|
end
|