Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write Brief: Load unconfident words from backend and update caching #39975

Merged
merged 5 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: other

Write Brief: Load unconfident words from backend and update caching
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ import React, { useState, useEffect, useCallback } from 'react';
* Internal dependencies
*/
import features from './features';
import calculateFleschKincaid from './utils/FleschKincaidUtils';
import calculateFleschKincaid from './utils/flesch-kincaid-utils';
import { canWriteBriefFeatureBeEnabled } from './utils/get-availability';
import { getPostText } from './utils/getPostText';
import { getPostText } from './utils/get-post-text';
import './breve.scss';
/**
* Types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { __ } from '@wordpress/i18n';
/**
* Internal dependencies
*/
import { escapeRegExp } from '../../utils/escapeRegExp';
import { escapeRegExp } from '../../utils/escape-regexp';
import phrases from './phrases';
/**
* Types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { __ } from '@wordpress/i18n';
/**
* Internal dependencies
*/
import { escapeRegExp } from '../../utils/escapeRegExp';
import { escapeRegExp } from '../../utils/escape-regexp';
/**
* Types
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,12 @@ import nspell from 'nspell';
/**
* Internal dependencies
*/
import getDictionary from '../../utils/get-dictionary';
import getFeatureData from '../../utils/get-feature-data';
import a8c from './a8c';
/**
* Types
*/
import type {
BreveFeatureConfig,
SpellingDictionaryContext,
HighlightedText,
SpellChecker,
BreveDispatch,
} from '../../types';
import type { BreveFeatureConfig, HighlightedText, SpellChecker, BreveDispatch } from '../../types';

const debug = debugFactory( 'jetpack-ai-breve:spelling-mistakes' );

Expand All @@ -32,53 +26,6 @@ export const SPELLING_MISTAKES: BreveFeatureConfig = {
};

const spellCheckers: { [ key: string ]: SpellChecker } = {};
const contextRequests: {
[ key: string ]: { loading: boolean; loaded: boolean; failed: boolean };
} = {};

const fetchContext = async ( language: string ) => {
debug( 'Fetching spelling context from the server' );

const { setDictionaryLoading } = dispatch( 'jetpack/ai-breve' ) as BreveDispatch;

setDictionaryLoading( SPELLING_MISTAKES.name, true );

try {
contextRequests[ language ] = { loading: true, loaded: false, failed: false };
const data = await getDictionary( SPELLING_MISTAKES.name, language );

localStorage.setItem(
`jetpack-ai-breve-spelling-context-${ language }`,
JSON.stringify( data )
);

contextRequests[ language ] = { loading: false, loaded: true, failed: false };
debug( 'Loaded spelling context from the server' );
} catch ( error ) {
debug( 'Failed to fetch spelling context', error );
contextRequests[ language ] = { loading: false, loaded: false, failed: true };
// TODO: Handle retries
} finally {
setDictionaryLoading( SPELLING_MISTAKES.name, false );
}
};

const getContext = ( language: string ) => {
// First check if the context is already defined in local storage
const storedContext = localStorage.getItem( `jetpack-ai-breve-spelling-context-${ language }` );
let context: SpellingDictionaryContext | null = null;
const { loading, failed } = contextRequests[ language ] || {};

if ( storedContext ) {
context = JSON.parse( storedContext );
debug( 'Loaded spelling context from local storage' );
} else if ( ! loading && ! failed ) {
// If the context is not in local storage and we haven't failed to fetch it before, try to fetch it once
fetchContext( language );
}

return context;
};

export const getSpellChecker = ( { language = 'en' }: { language?: string } = {} ) => {
if ( spellCheckers[ language ] ) {
Expand All @@ -87,7 +34,7 @@ export const getSpellChecker = ( { language = 'en' }: { language?: string } = {}

// Cannot await here as the Rich Text function needs to be synchronous.
// Load of the dictionary in the background if necessary and re-trigger the highlights later.
const spellingContext = getContext( language );
const spellingContext = getFeatureData( { feature: SPELLING_MISTAKES.name, language } );

if ( ! spellingContext ) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import { __ } from '@wordpress/i18n';
/**
* Internal dependencies
*/
import { escapeRegExp } from '../../utils/escapeRegExp';
import words from './words';
import { escapeRegExp } from '../../utils/escape-regexp';
import getFeatureData from '../../utils/get-feature-data';
/**
* Types
*/
Expand All @@ -20,11 +20,11 @@ export const UNCONFIDENT_WORDS: BreveFeatureConfig = {
defaultEnabled: true,
};

const list = new RegExp( `\\b(${ words.map( escapeRegExp ).join( '|' ) })\\b`, 'gi' );

export default function unconfidentWords( blockText: string ): Array< HighlightedText > {
const matches = blockText.matchAll( list );
const highlightedTexts: Array< HighlightedText > = [];
const dictionary = getFeatureData( { feature: UNCONFIDENT_WORDS.name, language: 'en' } ) ?? [];
const list = new RegExp( `\\b(${ dictionary.map( escapeRegExp ).join( '|' ) })\\b`, 'gi' );
const matches = blockText.matchAll( list );

for ( const match of matches ) {
const text = match[ 0 ].trim();
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/**
* External dependencies
*/
import { dispatch } from '@wordpress/data';
import debugFactory from 'debug';
/**
* Types
*/
import { BreveDispatch, SpellingDictionaryContext } from '../types';

const debug = debugFactory( 'jetpack-ai-breve:fetch-feature-data' );

type GetFeatureDataParams = {
feature: string;
language: string;
lastRequested?: string;
};

const briefData: {
// feature
[ key: string ]: {
// language
[ key: string ]: {
loading: boolean;
failed: boolean;
verified?: boolean;
data?: Array< string > | SpellingDictionaryContext;
};
};
} = {};

// Clear the local storage for the spelling context's old key. TODO: remove this after a few releases
localStorage.removeItem( 'jetpack-ai-breve-spelling-context-en' );

async function fetchFromServer( {
feature,
language,
lastRequested,
}: GetFeatureDataParams ): Promise< {
requestTime: string;
data: Array< string > | SpellingDictionaryContext | null;
} > {
// Randomize the server to balance the load
const counter = Math.floor( Math.random() * 3 );
const url = `https://s${ counter }.wp.com/wp-content/lib/jetpack-ai/breve-dictionaries/${ feature }/${ language }.json`;

// If we have a lastRequested date, first send a HEAD request to check if the data has been modified
// The If-Modified-Since header causes a CORS preflight request error, so we need to check manually
if ( lastRequested ) {
const headData = await fetch( url, { method: 'HEAD' } );
const lastModified = headData.headers.get( 'last-modified' );
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The etag should be more precise, but it is always null for some reason, even though I can see it in the browser tools 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's because CORS requests have limited headers that are accessible. If we can set access-control-expose-headers server side (not sure how feasible this is) then we can add etag there and will have access to it.


if ( ! lastModified ) {
throw new Error( 'Failed to fetch metadata' );
}

if ( new Date( lastRequested ) >= new Date( lastModified ) ) {
// If the data has not been modified, return null
return null;
}
}

const requestTime = new Date().toUTCString();
const data = await fetch( url );

if ( data.status === 404 ) {
throw new Error( 'The requested data does not exist' );
} else if ( data.status !== 200 ) {
throw new Error( 'Failed to fetch data' );
}

return {
requestTime,
data: await data.json(),
};
}

async function fetchFeatureData( { feature, language, lastRequested }: GetFeatureDataParams ) {
debug( 'Fetching feature data for type: %s. language: %s', feature, language );

const { setDictionaryLoading } = dispatch( 'jetpack/ai-breve' ) as BreveDispatch;

briefData[ feature ][ language ].loading = true;

setDictionaryLoading( feature, true );

try {
const fetchedData = await fetchFromServer( { feature, language, lastRequested } );
briefData[ feature ][ language ].verified = true;

if ( ! fetchedData ) {
debug( 'Data not modified', feature, language );
return;
}

const { requestTime, data } = fetchedData;

debug( 'Loaded data from server', feature, language );

// Cache the data in memory
briefData[ feature ][ language ].data = data;

// Cache the data in local storage
localStorage.setItem(
`jetpack-ai-breve-data-${ feature }-${ language }`,
JSON.stringify( { requestTime, data } )
);
} catch ( error ) {
debug( 'Failed to fetch feature data context', error );
briefData[ feature ][ language ].failed = true;
} finally {
briefData[ feature ][ language ].loading = false;
setDictionaryLoading( feature, false );
}
}

export default function getFeatureData( { feature, language }: GetFeatureDataParams ) {
// Initialize the feature data in memory, if it's not already defined
briefData[ feature ] = briefData[ feature ] || {};
briefData[ feature ][ language ] = briefData[ feature ][ language ] || {
loading: false,
failed: false,
verified: false,
};

const { loading, failed, data, verified } = briefData[ feature ]?.[ language ] ?? {
loading: false,
failed: false,
};

// First check if the data is already loaded
if ( data ) {
return data;
}

if ( loading ) {
return null;
}

// Check if the feature data is already defined in local storage
const storedData = localStorage.getItem( `jetpack-ai-breve-data-${ feature }-${ language }` );
let lastRequested: string;

if ( storedData ) {
try {
const { requestTime, data: parsedData } = JSON.parse( storedData );

// If the data is verified or if requesting failed once, return the data we have. TODO: handle retries
if ( verified || failed ) {
debug( 'Loaded data from local storage', feature, language );

// Cache the data in memory
briefData[ feature ][ language ].data = parsedData ?? null;
return parsedData;
}

// Set the last requested time to check if the data has been modified
lastRequested = requestTime;
} catch ( error ) {
debug( 'Failed to parse data from local storage', feature, language, error );
// If we failed to parse the data, remove it from local storage, as it's likely corrupted
localStorage.removeItem( `jetpack-ai-breve-data-${ feature }-${ language }` );
}
}

// If the request failed once, don't try again. TODO: handle retries
if ( ! failed ) {
fetchFeatureData( { feature, language, lastRequested } );
}

// Return null if the data is not loaded yet, as the function is synchronous and will be called again
return null;
}
Loading