mirror of
https://github.com/redlib-org/redlib-instances.git
synced 2024-11-13 18:06:15 -05:00
791 lines
20 KiB
Bash
Executable File
791 lines
20 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# generate-instances-json.sh
|
|
#
|
|
# Generate a JSON of Libreddit instances, given a CSV input listing those
|
|
# instances.
|
|
#
|
|
# Information on script options is available by running
|
|
# generate-instances.sh -h
|
|
#
|
|
# For more information on how to use this script, see README.md.
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, either version 3 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
set -o pipefail
|
|
|
|
# Grab today's date.
|
|
TODAY="$(date -I -u)"
|
|
|
|
# List of programs on which this script depends.
|
|
# curl is required in order to make HTTP requests.
|
|
# jq is required for JSON processing.
|
|
DEPENDENCIES=(curl jq)
|
|
|
|
# If USER_AGENT is specified in the envs, we'll pass this argument to curl
|
|
# using the -A flag to set a custom User-Agent.
|
|
USER_AGENT="${USER_AGENT:-}"
|
|
|
|
# check_tor
|
|
#
|
|
# Returns true if tor is running; false otherwise.
|
|
check_tor ()
|
|
{
|
|
pidof -q tor
|
|
}
|
|
|
|
# check_bin
|
|
#
|
|
# Returns true if the specified program is in PATH; false otherwise.
|
|
check_program ()
|
|
{
|
|
command -v "${1}" >/dev/null
|
|
}
|
|
|
|
# can_tor
|
|
#
|
|
# Returns true if tor is running and torsocks is installed.
|
|
can_tor ()
|
|
{
|
|
check_tor && check_program torsocks
|
|
}
|
|
|
|
# check_dependencies
|
|
#
|
|
# Returns false if a script dependency is missing. If this is the case, each
|
|
# missing dependency will be printed to stdout.
|
|
check_dependencies ()
|
|
{
|
|
local -i rc=0
|
|
|
|
for dep in "${DEPENDENCIES[@]}"
|
|
do
|
|
if ! check_program "${dep}"
|
|
then
|
|
rc=1
|
|
echo "${dep}"
|
|
fi
|
|
done
|
|
|
|
return "${rc}"
|
|
}
|
|
|
|
# read_csv_row [-d DELIMITER] [-v] ROW
|
|
#
|
|
# Reads a row of comma-separated values. Each value is printed as a separate
|
|
# line to stdout. The function prints nothing and returns 1 if the row is
|
|
# malformed, or if no ROW argument was passed to the function.
|
|
#
|
|
# The default delimiter is ','. Option -d can change this delimiter to a
|
|
# different character.
|
|
#
|
|
# Option -v will print "$i: " before each value, where $i starts at 1 and
|
|
# represents the value's position in the row.
|
|
#
|
|
# It is assumed that the total input is a row, which may include \n (if it's
|
|
# in, say, a quoted value).
|
|
#
|
|
# This will increment the value of the global variable POSITION by
|
|
# how many characters has been read.
|
|
read_csv_row ()
|
|
{
|
|
local opt=
|
|
local OPTIND
|
|
local OPTARG
|
|
|
|
local -i i=0
|
|
local -i quote=0
|
|
local -i esc=0
|
|
local -i seen_delim=0
|
|
local row=
|
|
local print_col=n
|
|
local len=
|
|
local char=
|
|
local value=
|
|
local -a values=()
|
|
local delim=,
|
|
|
|
while getopts "d:v" opt
|
|
do
|
|
case "${opt}" in
|
|
d) delim="${OPTARG}" ;;
|
|
v) print_col="y" ;;
|
|
*) ;;
|
|
esac
|
|
done
|
|
shift "$((OPTIND-1))"
|
|
|
|
# Get row from arg.
|
|
row="${1}"
|
|
if [[ -z "${1}" ]]
|
|
then
|
|
return 1
|
|
fi
|
|
|
|
# Process row character by character.
|
|
len="${#row}"
|
|
|
|
value=
|
|
for (( i = 0; i < len; i++ ))
|
|
do
|
|
char="${row:${i}:1}"
|
|
|
|
# "Handle" escapes. Really, it just means writing the escape verbatim
|
|
# into the string. Yes, that includes ". Because this is ultimately
|
|
# going into JSON, and making this a fully-featured CSV reader would
|
|
# be beyond the scope of for what this script is intended.
|
|
if [[ ${esc} -eq 1 ]]
|
|
then
|
|
esc=0
|
|
value+="\\${char}"
|
|
|
|
# Escape handled. Move on to next character.
|
|
continue
|
|
fi
|
|
|
|
# \ triggers escape.
|
|
# shellcheck disable=SC1003
|
|
if [[ "${char}" == '\' ]]
|
|
then
|
|
esc=1
|
|
continue
|
|
fi
|
|
|
|
# A delimiter means the end of the value (assuming we're not in a
|
|
# quote).
|
|
if [[ ${quote} -eq 0 && "${char}" == "${delim}" ]]
|
|
then
|
|
IFS=$'\n' values+=("${value}")
|
|
value=
|
|
seen_delim=1
|
|
continue
|
|
fi
|
|
|
|
# " means the value is quoted, assuming we're not in the middle of an
|
|
# escape.
|
|
if [[ ${esc} -eq 0 && "${char}" == '"' ]]
|
|
then
|
|
quote=$(( (quote + 1) % 2 ))
|
|
|
|
# We don't actually want to include the double quote in the value.
|
|
continue
|
|
fi
|
|
|
|
# This character isn't a delimier, so switch off seen_delim.
|
|
seen_delim=0
|
|
|
|
value+="${char}"
|
|
done
|
|
|
|
# Handle unexpected end of row.
|
|
if [[ ${quote} -eq 1 || ${esc} -eq 1 ]]
|
|
then
|
|
return 1
|
|
fi
|
|
|
|
# Add the final value to the list of values.
|
|
if [[ (${seen_delim} -eq 0 && -n "${value}") || (${seen_delim} -eq 1 && -z "${value}") ]]
|
|
then
|
|
values+=("${value}")
|
|
fi
|
|
|
|
# Print each value in a separate line.
|
|
i=1
|
|
for value in "${values[@]}"
|
|
do
|
|
if [[ "${print_col}" == "y" ]]
|
|
then
|
|
echo -n "${i}: "
|
|
(( i++ ))
|
|
fi
|
|
echo "${value}"
|
|
done
|
|
}
|
|
|
|
# canonicalize_url URL
|
|
#
|
|
# Performs the following transformations of the given URL:
|
|
# -- Converts the string to all-lowercase.
|
|
# -- Removes any trailing slashes, but only if the path is /.
|
|
#
|
|
# Returns 1 if no or a blank URL is provided, or 2 if the string is not a
|
|
# valid url.
|
|
#
|
|
# TODO: Internationalized domain name support. For now, provide the URL in
|
|
# Punycode if needed.
|
|
canonicalize_url ()
|
|
{
|
|
local url=
|
|
|
|
if [[ -z "${1}" ]]
|
|
then
|
|
return 1
|
|
fi
|
|
url="${1}"
|
|
|
|
# Convert URL to lowercase.
|
|
url="${url,,}"
|
|
|
|
# Reject the string if it's not a valid URL.
|
|
if [[ ! "${url}" =~ ^[a-z0-9]+://[a-z0-9\.\-]+/? ]]
|
|
then
|
|
return 2
|
|
fi
|
|
|
|
# Strip leading /, but only if the path is /.
|
|
if [[ "${url#*://*/}" =~ ^/*$ ]]
|
|
then
|
|
while [[ "${url: -1:1}" == "/" ]]
|
|
do
|
|
url="${url:0: -1}"
|
|
done
|
|
fi
|
|
|
|
echo "${url}"
|
|
}
|
|
|
|
# get [-T] URL
|
|
#
|
|
# Makes an HTTP(S) GET equest to the provided URL with curl. The response is
|
|
# written to standard out. get will determine if the URL is an onion site, and,
|
|
# if so, it wrap the curl call with torsocks. The return value is the curl
|
|
# return value, or:
|
|
# 100: no or blank URL provided
|
|
# 101: invalid URL
|
|
# 102: URL is an onion site, but we can't connect to tor
|
|
# 103: non-tor URL has non-https scheme
|
|
# 104: prevented from dialing onion site
|
|
#
|
|
# Option -T will cause get to skip an onion site, silently, and 104 will be
|
|
# returned.
|
|
get ()
|
|
{
|
|
local opt=
|
|
local OPTIND
|
|
local OPTARG
|
|
|
|
local no_tor=n
|
|
local url=
|
|
local url_no_scheme=
|
|
local scheme=
|
|
local zone=
|
|
local -i rc=0
|
|
local -i tries=3
|
|
local -i timeout=30
|
|
local -a curl_cmd=(curl)
|
|
|
|
while getopts "T" opt
|
|
do
|
|
case "${opt}" in
|
|
T) no_tor=y ;;
|
|
*) ;;
|
|
esac
|
|
done
|
|
shift $((OPTIND-1))
|
|
|
|
if [[ -z "${1}" ]]
|
|
then
|
|
return 100
|
|
fi
|
|
url="${1}"
|
|
|
|
# Get the canonical URL.
|
|
url="$(canonicalize_url "${url}")"
|
|
if [[ -z "${url}" ]]
|
|
then
|
|
return 101
|
|
fi
|
|
url_no_scheme="${url#*://}"
|
|
|
|
# Extract the scheme. We only support HTTP or HTTPS. But maybe Libreddit
|
|
# has a future on gopher...
|
|
#
|
|
# TODO: support i2p
|
|
local scheme="${url%%://*}"
|
|
case "${scheme}" in
|
|
http|https) ;;
|
|
*) return 101 ;;
|
|
esac
|
|
|
|
# Extract the zone.
|
|
zone="$(<<<"${url}" sed -nE 's|^.+://.+\.([^\./]+)/?.*|\1|p')"
|
|
|
|
# Special handling for Onion sites.
|
|
# - Don't bother if tor isn't running or we don't have torsocks. But if
|
|
# both are available, make sure we warp curl with torsocks.
|
|
# - Onion sites can be either HTTPS or HTTP. But we want to enforce
|
|
# HTTPS on clearnet sites.
|
|
# - Increase curl max-time to 60 seconds.
|
|
if [[ "${zone}" == "onion" ]]
|
|
then
|
|
if [[ "${no_tor}" == "y" ]]
|
|
then
|
|
return 104
|
|
fi
|
|
|
|
if ! can_tor
|
|
then
|
|
return 102
|
|
fi
|
|
|
|
timeout=60
|
|
curl_cmd=(torsocks curl)
|
|
elif [[ "${scheme}" != "https" ]]
|
|
then
|
|
return 103
|
|
fi
|
|
|
|
# Use a custom User-Agent if provided.
|
|
if [[ -n "${USER_AGENT?}" ]]
|
|
then
|
|
curl_cmd=("${curl_cmd[@]}" -A "${USER_AGENT}")
|
|
fi
|
|
|
|
# Do the GET. Try up to the number of times specified in the tries variable.
|
|
for (( i = tries; i > 0; i-- ))
|
|
do
|
|
"${curl_cmd[@]}" -m"${timeout}" -fsL -- "${scheme}://${url_no_scheme}"
|
|
rc=$?
|
|
|
|
if [[ ${rc} -eq 0 ]]
|
|
then
|
|
return
|
|
fi
|
|
done
|
|
|
|
return ${rc}
|
|
}
|
|
|
|
# create_instance_entry [-T] URL COUNTRY_CODE [CLOUDFLARE [DESCRIPTION]]
|
|
#
|
|
# Create JSON object for instance. To specify that the instance is behind
|
|
# Cloudflare, simply set the third argument to be true; any other value
|
|
# will be interpreted as false.
|
|
#
|
|
# A description can be specified in the fourth argument (which means that, if
|
|
# you want to specify description for a website for which Cloudflare is
|
|
# _disabled_, set the third argument to ""). If you pass description in,
|
|
# all quotes will need to be escaped, as this will go directly into a
|
|
# JSON string value. (The idea is that read_csv_row will do the appropriate
|
|
# processing of the rows, including escaping characters in the description
|
|
# column and we will then pass those values verbatim into this function.)
|
|
#
|
|
# Option -T will cause get to skip an onion site, silently, and 100 will be
|
|
# returned.
|
|
create_instance_entry ()
|
|
{
|
|
local cloudflare=n
|
|
local res=
|
|
local version=
|
|
local json=
|
|
local url_type="url"
|
|
local -i rc=0
|
|
local -a get_opts=()
|
|
|
|
local opt=
|
|
local OPTIND
|
|
local OPTARG
|
|
|
|
while getopts "T" opt
|
|
do
|
|
case "${opt}" in
|
|
T) get_opts+=("-T") ;;
|
|
*) ;;
|
|
esac
|
|
done
|
|
shift $((OPTIND-1))
|
|
|
|
local url="${1}"
|
|
local country="${2}"
|
|
local description="${4}"
|
|
|
|
if [[ -z "${url}" || -z "${country}" ]]
|
|
then
|
|
return 1
|
|
fi
|
|
|
|
if [[ "${3}" == "true" ]]
|
|
then
|
|
cloudflare=y
|
|
fi
|
|
|
|
res="$(get "${get_opts[@]}" "${url}")"
|
|
rc=$?
|
|
|
|
if [[ ${rc} -ne 0 ]]
|
|
then
|
|
# 104 is returned if we prevented get from connecting to an onion site.
|
|
# That requires us to return the special code 100.
|
|
if [[ ${rc} -eq 104 ]]
|
|
then
|
|
return 100
|
|
fi
|
|
|
|
return 2
|
|
fi
|
|
|
|
if [[ -z "${res}" ]]
|
|
then
|
|
return 3
|
|
fi
|
|
|
|
# There's no good way to get the version apart from a scrape. This might
|
|
# not work in early versions of Libreddit, or into the future.
|
|
# TODO: previous capture group was ([^\<]+), but I changed this to
|
|
# (v([0-9]+\.){2}[0-9]+) under the assumption the version is always a semantic
|
|
# version; but this may not be true.
|
|
version="$(<<<"${res}" sed -nE 's/.*<span\s+id="version">(v([0-9]+\.){2}[0-9]+).*$/\1/p')"
|
|
if [[ -z "${version}" ]]
|
|
then
|
|
return 4
|
|
fi
|
|
|
|
# Find out if this is an onion website.
|
|
# Yeah, this is a little lazy and we could do this a bit better.
|
|
if [[ "${url,,}" =~ ^https?://[^/]+\.onion ]]
|
|
then
|
|
url_type="onion"
|
|
fi
|
|
|
|
# Build JSON.
|
|
json="{"
|
|
json+="$(printf '"%s":"%s"' "${url_type}" "${url}")"
|
|
json+=","
|
|
json+="$(printf '"country":"%s"' "${country}")"
|
|
json+=","
|
|
json+="$(printf '"version":"%s"' "${version}")"
|
|
|
|
if [[ "${cloudflare}" == "y" ]]
|
|
then
|
|
json+=","
|
|
json+="\"cloudflare\":true"
|
|
fi
|
|
|
|
if [[ -n "${description}" ]]
|
|
then
|
|
# DANGER: If the description string isn't properly escaped, the JSON will be
|
|
# malformed!
|
|
json+=","
|
|
json+="$(printf '"description":"%s"' "${description}")"
|
|
fi
|
|
json+="}"
|
|
|
|
echo "${json}"
|
|
}
|
|
|
|
# NOTES
|
|
#
|
|
# use jq --slurp to turn mutliple objects into array
|
|
#
|
|
# load any existing onion sites from json:
|
|
# jq -Mcer '.instances[] | select(.onion)' instances-example.json
|
|
|
|
# helpdoc
|
|
#
|
|
# Print usage information to stdout.
|
|
helpdoc ()
|
|
{
|
|
cat <<!
|
|
USAGE
|
|
${BASH_SOURCE[0]} [-I INPUT_JSON] [-T] [-f] [-i INPUT_CSV] [-o OUTPUT_JSON]
|
|
${BASH_SOURCE[0]} -h
|
|
|
|
DESCRIPTION
|
|
Generate a JSON of Libreddit instances, given a CSV input listing those
|
|
instances.
|
|
|
|
The INPUT_CSV file must be a file in CSV syntax of the form
|
|
|
|
[url],[country code],[cloudflare enabled],[description]
|
|
|
|
where all four parameters are required (though the description may be
|
|
blank). Except for onion sites, all URLs MUST be HTTPS.
|
|
|
|
OUTPUT_JSON will be overwritten if it exists. No confirmation will be
|
|
requested from the user.
|
|
|
|
By default, this script will attempt to connect to instances in the CSV
|
|
that are on Tor, provided that it can (it will check to see if Tor is
|
|
running and the availability of the torsocks program). If you want to
|
|
disable connections to these onion sites, provide the -T option.
|
|
|
|
OPTIONS
|
|
-I INPUT_JSON
|
|
Import the list of Libreddit onion instances from the file INPUT_JSON.
|
|
To use stdin, provide \`-I -\`. Implies -T. Note that the argument
|
|
provided to this option CANNOT be the same as the argument provided to
|
|
-i. If the JSON could not be read, the script will exit with status
|
|
code 1.
|
|
|
|
-T
|
|
Do not connect to Tor. Onion sites in INPUT_CSV will not be processed.
|
|
Assuming no other failure, the script will still exit with status code
|
|
0.
|
|
|
|
-f
|
|
Force the script to exit, with status code 1, upon the first failure to
|
|
connect to an instance. Normally, the script will continue to build and
|
|
output the JSON even when one or more of the instances could not be
|
|
reached, though the exit code will be non-zero.
|
|
|
|
-i INPUT_CSV
|
|
Use INPUT_CSV as the input file. To read from stdin (the default
|
|
behavior), either omit this option or provide \`-i -\`. Note that the
|
|
argument provided to this option CANNOT be the same as the argument
|
|
provided to -I.
|
|
|
|
-o OUTPUT_JSON
|
|
Write the results to OUTPUT_JSON. Any existing file will be
|
|
overwritten. To write to stdout (the default behavior), either omit
|
|
this option or provide \`-o -\`.
|
|
|
|
ENVIRONMENT
|
|
|
|
USER_AGENT
|
|
Sets the User-Agent that curl will use when making the GET to each website.
|
|
!
|
|
}
|
|
|
|
# main
|
|
#
|
|
# Main function.
|
|
main ()
|
|
{
|
|
local opt=
|
|
local OPTIND
|
|
local OPTARG
|
|
|
|
local failfast=n
|
|
local do_tor=y
|
|
local -a get_opts=()
|
|
local -a missing_deps=()
|
|
local import_onions_from_file=
|
|
local input_file=/dev/stdin
|
|
local output_file=/dev/stdout
|
|
local -a instance_entries=()
|
|
local -a imported_onions=()
|
|
local instance_entry=
|
|
local -i rc=0
|
|
|
|
while getopts ":I:Tfhi:o:" opt
|
|
do
|
|
case "${opt}" in
|
|
I) import_onions_from_file="${OPTARG}" ;;
|
|
T) do_tor=n ;;
|
|
f) failfast=y ;;
|
|
h) helpdoc ; exit ;;
|
|
i)
|
|
input_file="${OPTARG}"
|
|
if [[ -z "${input_file}" ]]
|
|
then
|
|
echo >&2 "-i: Please specify a file."
|
|
fi
|
|
|
|
if [[ "${input_file}" == '-' ]]
|
|
then
|
|
input_file=/dev/stdin
|
|
fi
|
|
;;
|
|
o)
|
|
output_file="${OPTARG}"
|
|
if [[ -z "${output_file}" ]]
|
|
then
|
|
echo >&2 "-o: Please specify a file."
|
|
fi
|
|
|
|
if [[ "${output_file}" == '-' ]]
|
|
then
|
|
output_file=/dev/stdout
|
|
fi
|
|
;;
|
|
\?)
|
|
echo >&2 "-${OPTARG}: invalid option"
|
|
helpdoc
|
|
exit 255
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Make sure we have necessary dependencies before moving forward.
|
|
# shellcheck disable=SC2207
|
|
IFS=$'\n' missing_deps=($(check_dependencies))
|
|
|
|
if [[ ${#missing_deps} -ne 0 ]]
|
|
then
|
|
{
|
|
echo "Dependencies are missing. Please install them and then try running the script again."
|
|
echo
|
|
echo "Missing dependencies:"
|
|
|
|
for dep in "${missing_deps[@]}"
|
|
do
|
|
echo -e "\t${dep}"
|
|
done
|
|
} >&2
|
|
return 1
|
|
fi
|
|
|
|
# Special handling for -I.
|
|
if [[ -n "${import_onions_from_file}" ]]
|
|
then
|
|
# Abort if -I and -i point to the same file.
|
|
if [[ "${import_onions_from_file}" == "${input_file}" ]]
|
|
then
|
|
echo >&2 "-I and -i cannot point to the same file."
|
|
echo >&2 "For more information, run: ${BASH_SOURCE[0]} -h"
|
|
return 1
|
|
fi
|
|
|
|
# Set do_tor <- n so that we don't attempt to make tor connections.
|
|
do_tor=n
|
|
|
|
# Attempt to read in onion instances.
|
|
# shellcheck disable=SC2207
|
|
# (mapfile not ideal here since a pipe is required, inducing a
|
|
# subshell, meaning nothing will actually get added to
|
|
# imported_onions)
|
|
IFS=$'\n' imported_onions=($(jq -Mcer '.instances[] | select(.onion)' "${import_onions_from_file}"))
|
|
rc=$?
|
|
|
|
if [[ ${rc} -ne 0 ]]
|
|
then
|
|
echo >&2 "Failed to read onion instances from existing JSON file."
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# Check to see if we have tor. If we don't, then we will have to import
|
|
# the existing tor instances from the JSON.
|
|
# TODO: For I2P, we will likely have to do something similar.
|
|
if [[ "${do_tor}" == "n" ]] || ! can_tor
|
|
then
|
|
if [[ "${do_tor}" == "y" ]]
|
|
then
|
|
echo >&2 "WARNING: Either the tor service is not running or torsocks is not available. Either way, onion sites will not be processed."
|
|
fi
|
|
do_tor="n"
|
|
get_opts+=("-T")
|
|
fi
|
|
|
|
if [[ "${input_file}" != "/dev/stdin" ]]
|
|
then
|
|
if [[ ! -e "${input_file}" ]]
|
|
then
|
|
echo >&2 "${input_file}: No such file or directory"
|
|
return 1
|
|
fi
|
|
|
|
if [[ -d "${input_file}" ]]
|
|
then
|
|
echo >&2 "${input_file}: Is a directory"
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# Read in the CSV.
|
|
local -a rows=()
|
|
<"${input_file}" mapfile rows
|
|
rc=0
|
|
|
|
if [[ ${rc} -ne 0 ]]
|
|
then
|
|
return ${rc}
|
|
fi
|
|
|
|
# Process the CSV, row by row.
|
|
local -a values=()
|
|
local -a failed=()
|
|
local l=1
|
|
local url=
|
|
for row in "${rows[@]}"
|
|
do
|
|
# shellcheck disable=SC2207
|
|
IFS=$'\n' values=($(read_csv_row "${row}"))
|
|
rc=$?
|
|
|
|
if [[ ${rc} -ne 0 || ${#values[@]} -lt 3 || ${#values[@]} -gt 4 ]]
|
|
then
|
|
echo >&2 "${l}: failed to parse row"
|
|
echo >&2 "Script will now terminate."
|
|
return 2
|
|
fi
|
|
|
|
# Print friendly message to log while processing row.
|
|
url="${values[0]}"
|
|
echo -n >&2 "${url}: "
|
|
|
|
instance_entry="$(IFS=$'\n' create_instance_entry "${get_opts[@]}" "${values[@]}")"
|
|
rc=$?
|
|
|
|
if [[ ${rc} -eq 0 ]]
|
|
then
|
|
IFS=$'\n' instance_entries+=("${instance_entry}")
|
|
echo "OK"
|
|
elif [[ ${rc} -eq 100 ]]
|
|
then
|
|
# rc=100 means the onion site is skipped because we told
|
|
# create_instance_entry to skip the onion site.
|
|
echo "SKIPPED"
|
|
else
|
|
echo "FAILED"
|
|
|
|
if [[ "${failfast}" == "y" ]]
|
|
then
|
|
return 1
|
|
fi
|
|
|
|
failed+=("${url}")
|
|
fi >&2
|
|
|
|
(( l++ ))
|
|
rc=0
|
|
done
|
|
|
|
# Assemble everything into JSON.
|
|
# TODO: see if this can be done in one jq call, without having
|
|
# to pass the list to jq --slurp and then everything to jq.
|
|
printf '{"updated":"%s","instances":%s}' "${TODAY}" "$(IFS=$'\n'
|
|
for instance in "${instance_entries[@]}" "${imported_onions[@]}"
|
|
do
|
|
echo "${instance}"
|
|
done | jq -Mcers .
|
|
)" | jq -Mer . >"${output_file}"
|
|
rc=$?
|
|
|
|
if [[ ${rc} -ne 0 ]]
|
|
then
|
|
echo >&2 "There was a problem processing the JSON. The output file may be corrupted."
|
|
fi
|
|
|
|
if [[ ${#failed[@]} -gt 0 ]]
|
|
then
|
|
{
|
|
echo "The following instances could not be reached:"
|
|
for failed_url in "${failed[@]}"
|
|
do
|
|
echo -e "\t${failed_url}"
|
|
done
|
|
} >&2
|
|
|
|
return 1
|
|
fi
|
|
|
|
return ${rc}
|
|
}
|
|
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]
|
|
then
|
|
main "${@}"
|
|
exit
|
|
fi
|