From 70448c889a4ffd74d01fb1883995a7a9ef2201ca Mon Sep 17 00:00:00 2001 From: pbailie Date: Tue, 7 Feb 2023 19:11:32 -0500 Subject: [PATCH 1/2] Update pnc_compile.pl --- preferred_name_logging/pnc_compile.pl | 87 +++++++++++++++------------ 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/preferred_name_logging/pnc_compile.pl b/preferred_name_logging/pnc_compile.pl index abcbe41..4458d4c 100644 --- a/preferred_name_logging/pnc_compile.pl +++ b/preferred_name_logging/pnc_compile.pl @@ -5,8 +5,9 @@ # Invoke with -y or --yesterday to compile with yesterday's timestamps. # Useful if you run this script overnight starting after 12AM. -# Author: Peter Bailie, RPI Research Computing -# Date: April 29, 2022 +# Author: Peter Bailie, RPI Research Computing +# Date: April 29, 2022 +# Updated: February 7, 2023 use strict; use warnings; @@ -28,46 +29,58 @@ open my $psql_fh, "<:encoding(UTF-8)", $PSQL_LOG; open my $pnc_fh, ">>:encoding(UTF-8)", $PNC_LOG; -my ($timestamp, $userid, $auth) = ("", "", ""); -my ($oldfn, $newfn, $oldln, $newln, $line1, $line2); +# fn = firstname/givenname, ln = lastname/familyname +my ($timestamp, $userid, $auth, $oldfn, $newfn, $oldln, $newln, $line1, $line2); +my $regex_check = 1; while (<$psql_fh>) { - ($timestamp, $userid, $oldfn, $newfn, $oldln, $newln) = ($1, $2, $3, $4, $5, $6) if (/^${datestamp} (\d{2}:\d{2}:\d{2}\.\d{3} [A-Z]{3}).+DETAIL: USER_ID: "(.+?)" (?:PREFERRED_FIRSTNAME OLD: "(.*?)" NEW: "(.*?)" )?(?:PREFERRED_LASTNAME OLD: "(.*?)" NEW: "(.*?)")?$/); - $auth = $1 if (/\/\* AUTH: "(.+)" \*\//); - # $auth is always on a different line than $timestamp, $userid. - # But all three having data will indicate we have collected pref name change logs. - if ($timestamp ne "" && $userid ne "" && $auth ne "") { - # $oldfn, $newfn, $oldln, $oldfn -- some may be undefined. - # This happens when either the firstname or lastname change wasn't recorded in PSQL logs (because no change occured). - # Undefined vars need to be defined to prevent 'concatenation by undefned var' warning. - foreach ($oldfn, $newfn, $oldln, $newln) { - $_ = "" if (!defined $_); + if ($regex_check == 1) { + if ($_ =~ m/^${datestamp} (\d{2}:\d{2}:\d{2}\.\d{3} [A-Z]{3}).+LOG: PREFERRED_NAME DATA UPDATE$/) { + $timestamp = $1; + $regex_check = 2; } - - # If both old and new firstnames are blank, no change was logged. - if ($oldfn ne "" || $newfn ne "") { - ($oldfn, $newfn) = rpad(19, $oldfn, $newfn); - $line1 = " OLD PREF FIRSTNAME: ${oldfn}"; - $line2 = " NEW PREF FIRSTNAME: ${newfn}"; - } else { - ($line1, $line2) = ("", ""); - ($line1, $line2) = rpad(41, $line1, $line2); + } elsif ($regex_check == 2) { + if ($_ =~ m/DETAIL: USER_ID: "(.+?)" (?:PREFERRED_GIVENNAME OLD: "(.*?)" NEW: "(.*?)" )?(?:PREFERRED_FAMILYNAME OLD: "(.*?)" NEW: "(.*?)")?/) { + ($userid, $oldfn, $newfn, $oldln, $newln) = ($1, $2, $3, $4, $5); + $regex_check = 3; } + } elsif ($regex_check == 3) { + if ($_ =~ m/\/\* AUTH: "(.+)" \*\//) { + $auth = $1; - # If both old and new lastnames are blank, no change was logged. - if ($oldln ne "" || $newln ne "") { - ($oldln, $oldfn) = rpad(19, $oldln, $oldfn); - $line1 .= " OLD PREF LASTNAME: ${oldln}\n"; - $line2 .= " NEW PREF LASTNAME: ${newln}\n"; - } else { - $line1 .= "\n"; - $line2 .= "\n"; - } + # $oldfn, $newfn, $oldln, $oldfn -- some may be undefined. + # This happens when either the givenname or familyname change wasn't recorded in PSQL logs (because no change occured). + # Undefined vars need to be defined to prevent 'concatenation by undefned var' warning. + foreach ($oldfn, $newfn, $oldln, $newln) { + $_ = "" if (!defined $_); + } + + # If both old and new given names are blank, no change was logged. + if ($oldfn ne "" || $newfn ne "") { + ($oldfn, $newfn) = rpad(19, $oldfn, $newfn); + $line1 = " OLD PREF GIVENNAME: ${oldfn}"; + $line2 = " NEW PREF GIVENNAME: ${newfn}"; + } else { + ($line1, $line2) = ("", ""); + ($line1, $line2) = rpad(41, $line1, $line2); + } - ($userid) = rpad(9, $userid); - print $pnc_fh "${datestamp} ${timestamp} USER: ${userid} CHANGED BY: ${auth}\n"; - print $pnc_fh $line1; - print $pnc_fh $line2; - ($timestamp, $userid, $auth) = ("", "", ""); + # If both old and new family names are blank, no change was logged. + if ($oldln ne "" || $newln ne "") { + ($oldln, $oldfn) = rpad(19, $oldln, $oldfn); + $line1 .= " OLD PREF FAMILYNAME: ${oldln}\n"; + $line2 .= " NEW PREF FAMILYNAME: ${newln}\n"; + } else { + $line1 .= "\n"; + $line2 .= "\n"; + } + + ($userid) = rpad(9, $userid); + print $pnc_fh "${datestamp} ${timestamp} USER: ${userid} CHANGED BY: ${auth}\n"; + print $pnc_fh $line1; + print $pnc_fh $line2; + + $regex_check = 1; + } } } From abe457782df6752fbe7042a9e5603ed06d6158c1 Mon Sep 17 00:00:00 2001 From: pbailie Date: Fri, 10 Feb 2023 11:38:35 -0500 Subject: [PATCH 2/2] Update readme.md --- preferred_name_logging/readme.md | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/preferred_name_logging/readme.md b/preferred_name_logging/readme.md index 426a573..a6b3ed3 100644 --- a/preferred_name_logging/readme.md +++ b/preferred_name_logging/readme.md @@ -1,24 +1,28 @@ -# Preferred Name Logging +# Preferred Name Change Logging -## pnc_compile.pl +In the interests of diversity, Submitty provides for users to set a preferred name should it be different from their legal name. This feature can be abused, so changes to a user's preferred name is recorded into Postgresql's log for review. To make it easier to locate these logged messages, a sysadmin tools script, `pnc_compile.pl`, is provided to fetch the preferred name change logs from Postgresql and compile them into a human readable report. -This script will help track when user preferred names are changed. It attempts -to compile a report of who made a preferred name change, and what change had -occurred. +**IMPORTANT:** `pnc_compile.pl` needs to operate on a host that can directly access Postgresql's log. Typically, this means the script must be setup on the same server as Postgresql. -Submitty provides the preferred name change information in the Postgresql log. -This scipt is used to parse the Postgresql log and create its own report -of preferred name changes that is much more human readable. +1. Make sure your host has Perl 5.30.0 or later. + * Ubuntu 20.04 includes Perl 5.30.0. +2. Retrieve `pnc_compile.pl` from [Github](https://raw.githubusercontent.com/Submitty/SysadminTools/main/preferred_name_logging/pnc_compile.pl) (right click link and choose "Save Link As...") +3. Edit code file to setup its configuration. + * Locate the two lines shown below. They are near the top of the file. These lines dictate where to look for Postgresql's log and where to write the script's compiled log. + * `$PSQL_LOG` dictates where Postgresql's log is located. `$PNC_LOG` dictates where this script will record and append its report. + * The default for `$PSQL_LOG` is set for Postgresql 12 running in Ubuntu 20.04. The default for `$PNC_LOG` will write the script's report to the same directory as the script file. + * Change these values to match your host's setup. + ```perl + my $PSQL_LOG = "/var/log/postgresql/postgresql-12-main.log"; + my $PNC_LOG = "preferred_name_change_report.log"; + ``` +4. Setup a cron schedule to run the script. + * Postgresql's log is typically owned by `root`, so it is mandatory to run the script as `root`. + * Be sure to set execute permission on the script. + * The script will parse Postgresql's log *by the current day's datestamp*, so it is intended that the script is run once per day. + * Alternatively, if you wish to schedule the crontab for overnight after 12AM, you can set the `-y` or `--yesterday` argument so the script will intentionally parse Postgresql's log by the *previous* day's datestamp. e.g. `/path/to/pnc_compile.pl -y` -This script will parse logs based on a specific datestamp. Since this script -acquires its datestamp based on GMT, it might work best when the postgresql logs -are timestamped in UTC. - -This is intended to be run on the postgresql server on at least a daily basis. -Invoke the script with `-y` or `--yesterday` to parse logs with yesterday's -datestamp. That is useful to run the script overnight after 12AM. - -## FERPA +# FERPA Data processed and logged by this tool may be protected by [FERPA (20 U.S.C. ยง 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html).