diff --git a/preferred_name_logging/pnc_compile.pl b/preferred_name_logging/pnc_compile.pl new file mode 100644 index 0000000..abcbe41 --- /dev/null +++ b/preferred_name_logging/pnc_compile.pl @@ -0,0 +1,85 @@ +#!/usr/bin/env perl + +# Read Postgresql log and compile preferred name change report. + +# Invoke with -y or --yesterday to compile with yesterday's timestamps. +# Useful if you run this script overnight starting after 12AM. + +# Author: Peter Bailie, RPI Research Computing +# Date: April 29, 2022 + +use strict; +use warnings; +use autodie; +use v5.30.0; +use POSIX qw(strftime); + +# CONFIG -- denotes full path and file +my $PSQL_LOG = "/var/log/postgresql/postgresql-12-main.log"; +my $PNC_LOG = "preferred_name_change_report.log"; + +# Main +print STDERR "Root required.\n" and exit 1 if ($> != 0); + +my $epoch_offset = 0; +$epoch_offset = -86400 if (scalar @ARGV > 0 && ($ARGV[0] eq "-y" || $ARGV[0] eq "--yesterday")); +my $datestamp = strftime "%Y-%m-%d", gmtime(time + $epoch_offset); + +open my $psql_fh, "<:encoding(UTF-8)", $PSQL_LOG; +open my $pnc_fh, ">>:encoding(UTF-8)", $PNC_LOG; + +my ($timestamp, $userid, $auth) = ("", "", ""); +my ($oldfn, $newfn, $oldln, $newln, $line1, $line2); +while (<$psql_fh>) { + ($timestamp, $userid, $oldfn, $newfn, $oldln, $newln) = ($1, $2, $3, $4, $5, $6) if (/^${datestamp} (\d{2}:\d{2}:\d{2}\.\d{3} [A-Z]{3}).+DETAIL: USER_ID: "(.+?)" (?:PREFERRED_FIRSTNAME OLD: "(.*?)" NEW: "(.*?)" )?(?:PREFERRED_LASTNAME OLD: "(.*?)" NEW: "(.*?)")?$/); + $auth = $1 if (/\/\* AUTH: "(.+)" \*\//); + # $auth is always on a different line than $timestamp, $userid. + # But all three having data will indicate we have collected pref name change logs. + if ($timestamp ne "" && $userid ne "" && $auth ne "") { + # $oldfn, $newfn, $oldln, $oldfn -- some may be undefined. + # This happens when either the firstname or lastname change wasn't recorded in PSQL logs (because no change occured). + # Undefined vars need to be defined to prevent 'concatenation by undefned var' warning. + foreach ($oldfn, $newfn, $oldln, $newln) { + $_ = "" if (!defined $_); + } + + # If both old and new firstnames are blank, no change was logged. + if ($oldfn ne "" || $newfn ne "") { + ($oldfn, $newfn) = rpad(19, $oldfn, $newfn); + $line1 = " OLD PREF FIRSTNAME: ${oldfn}"; + $line2 = " NEW PREF FIRSTNAME: ${newfn}"; + } else { + ($line1, $line2) = ("", ""); + ($line1, $line2) = rpad(41, $line1, $line2); + } + + # If both old and new lastnames are blank, no change was logged. + if ($oldln ne "" || $newln ne "") { + ($oldln, $oldfn) = rpad(19, $oldln, $oldfn); + $line1 .= " OLD PREF LASTNAME: ${oldln}\n"; + $line2 .= " NEW PREF LASTNAME: ${newln}\n"; + } else { + $line1 .= "\n"; + $line2 .= "\n"; + } + + ($userid) = rpad(9, $userid); + print $pnc_fh "${datestamp} ${timestamp} USER: ${userid} CHANGED BY: ${auth}\n"; + print $pnc_fh $line1; + print $pnc_fh $line2; + ($timestamp, $userid, $auth) = ("", "", ""); + } +} + +close ($pnc_fh); +close ($psql_fh); +exit 0; + +# Right-pad string(s) with whitespaces. +# expected parameters: (1) padding value, (2...n) strings to pad +# return: list of padded strings +sub rpad { + my $numpadding = shift; + $_ = sprintf("%-${numpadding}s", $_) foreach @_; + return @_; +} diff --git a/preferred_name_logging/preferred_name_logging.php b/preferred_name_logging/preferred_name_logging.php deleted file mode 100755 index 07eeeb0..0000000 --- a/preferred_name_logging/preferred_name_logging.php +++ /dev/null @@ -1,364 +0,0 @@ -#!/usr/bin/env php - diff --git a/preferred_name_logging/readme.md b/preferred_name_logging/readme.md index 14eb1b1..426a573 100644 --- a/preferred_name_logging/readme.md +++ b/preferred_name_logging/readme.md @@ -1,87 +1,25 @@ # Preferred Name Logging +## pnc_compile.pl + This script will help track when user preferred names are changed. It attempts -to log who was authenticated for the change, and what change occurred. +to compile a report of who made a preferred name change, and what change had +occurred. + +Submitty provides the preferred name change information in the Postgresql log. +This scipt is used to parse the Postgresql log and create its own report +of preferred name changes that is much more human readable. -It works by first having Postgresql log the required information as the data is -updated in Submitty's databases. Then the sysadmin tool in this folder will -scrape the Postgresql logfile and record only those entries that showed -preferred name change. +This script will parse logs based on a specific datestamp. Since this script +acquires its datestamp based on GMT, it might work best when the postgresql logs +are timestamped in UTC. -This is setup and configured by Submitty during system installation and should -automatically operate daily at 2:05AM. +This is intended to be run on the postgresql server on at least a daily basis. +Invoke the script with `-y` or `--yesterday` to parse logs with yesterday's +datestamp. That is useful to run the script overnight after 12AM. ## FERPA Data processed and logged by this tool may be protected by [FERPA (20 U.S.C. ยง 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). Please consult and abide by your institute's data protection policies. - -## Logs - -Submitty's installation scripts will configure postgresql to write its logs to -`/var/local/submitty/logs/psql/`, rotated on a daily basis. - -This script will scrape the previous day's Postgresql log for any logged -changes to any user's preferred names. It will then create a daily log of -*preferred name changes* within `/var/local/submitty/logs/preferred_names/`. - -This script will also remove postgresql logs older than 2 days as postgresql's -own log rotation will not selectively remove outdated logs. - -## postgresql.conf - -Postgresql's original configuration is first copied to `postgresql.conf.backup`. -The following changes will be applied: -``` -log_destination = 'csvlog' -logging_collector = on -log_directory = '/var/log/postgresql' -log_filename = 'postgresql_%Y-%m-%dT%H%M%S.log' -log_file_mode = 0640 -log_rotation_age = 1d -log_rotation_size = 0 -log_min_messages = warning -log_min_duration_statement = -1 -log_statement = 'ddl' -log_error_verbosity = default -``` - -## preferred_names.json - -A sysadmin may optionally create a json file to configure a couple of options -for preferred name logging. If this json is not created, the script will -assume default settings, instead. - -To set these options, first create an empty text file in -`usr/local/submitty/config/preferred_names.json` - -Next, following the json format, you may set the following options (anything -else in the file will be ignored). - -* `log_emails` - This is either a singular email address or a list of email addresses. The - script will send error messages to the email address(es) listed. - - If this is a list, key values are ignored. But you could set key values to - document who owns a particular email address. - - Set to `null` to turn this off. Default setting is `null`. - -* `log_file_retention` - A whole number representing how many days of preferred name change logs to - keep. *This does not affect postgresql's logs.* Default setting is 7. - -## Example json: -```json -{ - "log_emails": - { - "Ada_Lovelace": "alovelace@submitty.com", - "Charles_Babbage": "cbabbage@submitty.com", - "Sysadmin_Mailing_List": "sysadmins@lists.submitty.com" - }, - - "log_file_retention": 30 -} -```