diff --git a/web/packages/media/file.ts b/web/packages/media/file.ts index 383be70d21..f90ffab08a 100644 --- a/web/packages/media/file.ts +++ b/web/packages/media/file.ts @@ -7,7 +7,7 @@ export const hasFileHash = (file: Metadata) => * [Note: Photos are always in local date/time] * * Photos out in the wild frequently do not have associated timezone offsets for - * the datetime embedded in their metadata. This is a artifact of an era where + * the date/time embedded in their metadata. This is a artifact of an era where * cameras didn't know often even know their date/time correctly, let alone the * UTC offset of their local data/time. * @@ -24,14 +24,14 @@ export const hasFileHash = (file: Metadata) => * show up as midnight, not as (e.g.) 19:30 IST. This is fact #2. * * Combine these two facts, and if you ponder a bit, you'll find that there is - * only one way for a photos app to show / sort / label (as a day) – by using - * the local datetime without the attached UTC offset **even if it is present**. + * only one way for a photos app to show / sort / label the date – by using the + * local date/time without the attached UTC offset, **even if it is present**. * * The UTC offset is still useful though, and we don't want to lose that * information. The number of photos with a UTC offset will only increase. And * whenever it is present, it provides additional context for the user. * - * So we keep both the local date/time string (an ISO8601 string guaranteed to + * So we keep both the local date/time string (an ISO 8601 string guaranteed to * be without an associated UTC offset), and an (optional) UTC offset string. * * It is important to NOT think of the local date/time string as an instant of @@ -49,7 +49,7 @@ export interface ParsedMetadataDate { /** * A local date/time. * - * This is a partial ISO8601 datetime string guaranteed not to have a + * This is a partial ISO 8601 date/time string guaranteed not to have a * timezone offset. e.g. "2023-08-23T18:03:00.000" */ dateTime: string; @@ -65,10 +65,95 @@ export interface ParsedMetadataDate { * {@link offsetTime}. * * When the {@link offsetTime} is present, this will accurately reflect a - * UTC timestamp. When the {@link offsetTime} is not present, this is not - * necessarily accurate, since it then assumes that the given - * {@link dateTime} is in the local time where this code is running. This is - * a good assumption but not always correct (e.g. vacation photos). + * UTC timestamp. When the {@link offsetTime} is not present it convert to a + * UTC timestamp by assuming that the given {@link dateTime} is in the local + * time where this code is running. This is a good assumption but not always + * correct (e.g. vacation photos). */ timestamp: number; } + +/** + * Parse a partial or full ISO 8601 string into a {@link ParsedMetadataDate}. + * + * @param s A partial or full ISO 8601 string. That is, it is a string of the + * form "2023-08-23T18:03:00.000+05:30" or "2023-08-23T12:33:00.000Z" with all + * components except the year potentially missing. + * + * @return A {@link ParsedMetadataDate}, or `undefined` if {@link s} cannot be + * parsed. + * + * --- + * Some examples: + * + * - "2022" => (2022, undefined) + * + */ +export const parseMetadataDate = ( + s: string, +): ParsedMetadataDate | undefined => { + // Construct the timestamp using the original string itself. If s is + // parseable as a date, then this'll be give us the correct UTC timestamp. + // If the UTC offset is not present, then this will be in the local + // (current) time. + const timestamp = new Date(s).getTime() * 1000; + if (isNaN(timestamp)) { + // s in not a well formed ISO 8601 date time string. + return undefined; + } + + // Now we try to massage s into two parts - the local date/time string, and + // an UTC offset string. + + let offsetTime: string | undefined; + let sWithoutOffset: string; + + // Check to see if there is a time-zone descriptor of the form "Z" or + // "±05:30" or "±0530" at the end of s. + const m = s.match(/Z|[+-]\d\d:?\d\d$/); + if (m?.index) { + sWithoutOffset = s.substring(0, m.index); + offsetTime = s.substring(m.index); + } else { + sWithoutOffset = s; + } + + // Convert sWithoutOffset - a potentially partial ISO 8601 string - to a + // canonical ISO 8601 string. + // + // In its full generality, this is non-trivial. The approach we take is: + // + // 1. Rely on the browser to be able to partial ISO 8601 string. This + // relies on non-standard behaviour but works in practice seemingly. + // + // 2. Get an ISO 8601 representation of it. This is standard. + // + // A thing to watch out for is that browsers treat date only and date time + // strings differently when the offset is not present (as would be for us). + // + // > When the time zone offset is absent, date-only forms are interpreted as + // > a UTC time and date-time forms are interpreted as local time. + // > + // > https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date#date_time_string_format + // + // For our purpose, we want to always interpret them as UTC time. This way, + // when we later gets back its string representation for step 2, we will get + // back the same numerical value, and can just chop off the "Z". + // + // So if the length of the string is less than or equal to yyyy-mm-dd (10), + // then we use it verbatim, otherwise we append a "Z". + + const date = new Date( + sWithoutOffset + (sWithoutOffset.length <= 10 ? "" : "Z"), + ); + + // The string returned by `toISOString` is guaranteed to be UTC and denoted + // by the suffix "Z". If we chop that off, we get back a canonical + // representation we wish for: A otherwise well-formed ISO 9601 string but + // any time zone descriptor. + const dateTime = dropLast(date.toISOString()); + + return { dateTime, offsetTime, timestamp }; +}; + +const dropLast = (s: string) => (s ? s.substring(0, s.length - 1) : s);