diff --git a/README.md b/README.md index 6ce3cfc..1df0ccb 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,15 @@ - Handles both PHP and JSON files - Model translations coming soon +## Requirements + +PHP 8.1 Is the minimum for the OpenAI Wrapper. + +Tested with Laravel ^9.45 and 10.x + +Filament is not required (but can add to their already awesome set of translations.) + + ## Installation Download the package via composer: @@ -46,6 +55,8 @@ Currently handles `.json` and `.php` translation files. Attribute tokens should remain unchanged. This was a challenge as GPT-3.5 refused to ignore :attribute and always translated it even when told explicity to ignore it. To solve this all tokens are replaced with *** before translation and added back in after translation ensuring continuity. +![php artisan vb:ai:translate --help](./media/help.png) + To run the script:- ```bash @@ -56,6 +67,8 @@ The script will scan all the source_directories for translation files and then p If you're happy to continue, select the model you wish to use and each source file will be translated. +![php artisan vb:ai:translate](./media/screenshot.png) + If the target file exists, only new keys not in the target will be translated. Unless you use the --force option which will overwrite any existing translations. diff --git a/config/ai-translate.php b/config/ai-translate.php index 52e9c10..8651598 100644 --- a/config/ai-translate.php +++ b/config/ai-translate.php @@ -14,6 +14,7 @@ //But do not include it here. These are the parent directories. 'source_directories' => [ 'lang', + 'resources/lang', //Developers can point to their packages to have them translated //Either in vendor or where ever they happen to be 'vendor/visualbuilder/email-templates/resources/lang', diff --git a/media/help.png b/media/help.png new file mode 100644 index 0000000..60a6f1a Binary files /dev/null and b/media/help.png differ diff --git a/media/screenshot.png b/media/screenshot.png index 0f90bb3..89c964b 100644 Binary files a/media/screenshot.png and b/media/screenshot.png differ diff --git a/src/Console/TranslateStrings.php b/src/Console/TranslateStrings.php index 00a6597..5625732 100644 --- a/src/Console/TranslateStrings.php +++ b/src/Console/TranslateStrings.php @@ -24,53 +24,66 @@ class TranslateStrings extends Command * * @var string */ - protected $description = 'Translates all language files'; - - protected $sourceData = []; - - protected $sourceLocale = ""; - protected $model = ""; - protected $maxInputStringLength = 2000; - protected $maxRetries = 5; - protected $totalPromptTokens = 0; + protected $description = "Translates all language files in these directories: "; + protected $model = ""; + protected $maxInputStringLength = 2000; + protected $maxRetries = 5; + protected $overwrite = false; + protected $sourceData = []; + protected $sourceLocale = ""; + protected $totalPromptTokens = 0; protected $totalCompletionTokens = 0; - - protected $overwrite = false; - + + public function __construct() { + parent::__construct(); + + $dirs = [ + 'source_directories' => [ + 'lang', + 'resources/lang', + ], + ]; + + $dirString = implode("\n", $dirs[ 'source_directories' ]); + + $this->signature = $dirString; + $this->setDescription( + (string) "Translates all PHP & JSON language files in these directories: \n\n\t- ".implode( + "\n\t- ", config('ai-translate.source_directories') + )."\n\n Source Locale: ".config('ai-translate.source-locale')."\n\n Target Languages: \n\t- ".implode( + "\n\t- ", config('ai-translate.target_locales') + ) + ); + } + /** * Execute the console command. * * @return int */ - public function handle() - { - + public function handle() { $this->init(); $files = $this->findSourceFiles(); $this->translateFiles($files); - + return Command::SUCCESS; } - - private function init() - { + + private function init() { $this->sourceLocale = config('ai-translate.source-locale'); $this->overwrite = (bool) $this->option('force'); } - - private function findSourceFiles() - { + + private function findSourceFiles() { $jsonFiles = FileHelper::getJsonSourceFileList($this->sourceLocale); $phpFiles = FileHelper::getLanguageFileList($this->sourceLocale); - + return array_merge($jsonFiles, $phpFiles); } - - private function translateFiles($files) - { - + + private function translateFiles($files) { $this->estimateCostAndSelectModel($files); - + foreach ($files as $sourceFile) { $this->sourceData = $this->readLanguageFile($sourceFile); $chunks = $this->chunkArray($this->sourceData); @@ -78,137 +91,37 @@ private function translateFiles($files) $this->handleTargetLocale($sourceFile, $chunks, $targetLocale, $targetLanguage); } } - + $this->newLine(); $this->info("Total prompt tokens used: ".$this->totalPromptTokens); $this->info("Total completion tokens used: ".$this->totalCompletionTokens); $this->warn("Total Cost: $".$this->getCost()); } - - /** - * Deliberately keeping the chunks smaller to avoid timeout issues with long inputs. - * With GPT-3.5 this equates to 2700 characters input per request - * @return void - */ - private function setMaxInputStringLength() - { - $maxTokens = config('ai-translate.ai-models')[ $this->model ][ 'max_tokens' ]; - $maxCharactersTotal = $maxTokens * 4; - $this->maxInputStringLength = round($maxCharactersTotal / 6, 0); - } - - /** - * Create the target file and process the input chunked array - * - * @param $file - * @param $chunks - * @param $targetLocale - * @param $targetLanguage - * - * @return void - */ - private function handleTargetLocale($file, $chunks, $targetLocale, $targetLanguage) - { - if ($targetFile = $this->createCheckOrEmptyTargetFile($file, $targetLocale)) { - $this->handleExistingTargetFile($targetFile, $chunks); - - foreach ($chunks as $chunk) { - $this->processChunk($targetFile, $chunk, $targetLanguage); - } - } - } - - /** - * If the file contains records - diff with the source to only translate new strings. - * @param $targetFile - * @param $chunks - * - * @return void - */ - private function handleExistingTargetFile($targetFile, &$chunks) - { - $targetArray = $this->readLanguageFile($targetFile); - if (count($targetArray) !== 0 && ! $this->overwrite) { - $diffArray = array_diff_key($this->sourceData, $targetArray); - $chunks = $this->chunkArray($diffArray); - } - } - - /** - * Gracefully try translation, with some retries if fails and save the result to the target - * @param $targetFile - * @param $chunk - * @param $targetLanguage - * - * @return void - */ - private function processChunk($targetFile, $chunk, $targetLanguage) - { - $retryCount = 0; - $complete = false; - while ($retryCount < $this->maxRetries && ! $complete) { - try { - $translatedChunk = OpenAiHelper::translateChunk($this, $chunk, $this->sourceLocale, $targetLanguage, $this->model); - $this->appendResponse($targetFile, $translatedChunk['translatedChunk']); - $complete = true; - } catch (\Exception $e) { - $this->handleRetry($retryCount, $e, $targetFile); - } - } - $this->handleRetryFailure($retryCount, $targetFile); - } - - private function handleRetry(&$retryCount, $exception, $targetFile) - { - $retryCount++; - $this->error('An error occurred while processing the file: ' . $targetFile); - $this->error('Error message: ' . $exception->getMessage()); - $this->info("Retrying $retryCount / $this->maxRetries Times in " . pow(2, $retryCount) . ' seconds'); - usleep(pow(2, $retryCount) * 1000000); // Wait for 2^retryCount seconds - } - - private function handleRetryFailure($retryCount, $targetFile) - { - if ($retryCount === $this->maxRetries) { - $this->error("Request failed after ".$this->maxRetries." retries"); - if (file_exists($targetFile)) { - unlink($targetFile); - } - } - } - - private function getCost() - { - $cost = ($this->totalPromptTokens / 1000) * config('ai-translate.ai-models')[ $this->model ][ 'input_price' ]; - $cost += ($this->totalCompletionTokens / 1000) * config('ai-translate.ai-models')[ $this->model ][ 'output_price' ]; - - return $cost; - } - + /** * Parse the input files and estimate the cost of translation * Ask the user which model they would like to use if they have not specified * * Set the model and the maximum input string length for the model + * * @param $files * @param $files * * @return true */ - private function estimateCostAndSelectModel($files) - { + private function estimateCostAndSelectModel($files) { $targetLanguages = config('ai-translate.target_locales'); $targetCount = count($targetLanguages); - + $this->info("Found ".count($files)." files to translate into $targetCount languages"); - + $stats = []; $totalItems = 0; $totalLength = 0; $sourceTokensTotal = 0; $targetTokensTotal = 0; $totalTokens = 0; - + foreach ($files as $file) { $lengths = FileHelper::countItemsAndStringLengths($file); $sourceTokens = OpenAiHelper::estimateTokens($lengths[ 'totalLength' ]); @@ -219,9 +132,9 @@ private function estimateCostAndSelectModel($files) $sourceTokens, $sourceTokens * $targetCount, $sourceTokens + ($sourceTokens * $targetCount), - + ]; - + // Accumulate totals $totalItems += $lengths[ 'itemCount' ]; $totalLength += $lengths[ 'totalLength' ]; @@ -229,20 +142,21 @@ private function estimateCostAndSelectModel($files) $targetTokensTotal += $sourceTokens * $targetCount; $totalTokens += $sourceTokens + ($sourceTokens * $targetCount); } - + // Append totals to stats $stats[] = ['Total', $totalItems, $totalLength, $sourceTokensTotal, $targetTokensTotal, $totalTokens]; - + $this->table( - ['File', 'Lines', 'Total String Length', 'Source Tokens', 'Target Tokens', 'Total Tokens'], - $stats + ['File', 'Lines', 'Total String Length', 'Source Tokens', 'Target Tokens', 'Total Tokens'], $stats ); - - $this->info('Cost Estimations per AI Model'); - + + $this->newLine(); + $this->info('Cost Estimations per AI model for '.number_format($totalTokens,0,"",",").' tokens'); + $totals = end($stats); - + $data = []; + foreach (config('ai-translate.ai-models') as $key => $model) { $data[] = [ $key, @@ -250,45 +164,54 @@ private function estimateCostAndSelectModel($files) round(($totals[ 4 ] / 1000) * $model[ 'output_price' ], 2), round( (($totals[ 3 ] / 1000) * $model[ 'input_price' ]) + (($totals[ 4 ] / 1000) - * $model[ 'output_price' ]), - 2 + * $model[ 'output_price' ]), 2 ), ]; } - + $this->table( - ['Model', 'Input Cost ($)', 'Output Cost ($)', 'Total Cost ($)'], - $data + ['Model', 'Input Cost ($)', 'Output Cost ($)', 'Total Cost ($)'], $data ); $this->warn('Please notes costs are estimations only, prices will vary subject to the destination languages translated'); - - if(! $this->option('cheapest') && ! $this->option('best')) { + + if(!$this->option('cheapest') && !$this->option('best')) { $model = $this->choice( - 'Which model would you like to use?', - array_keys(config('ai-translate.ai-models')), - array_keys(config('ai-translate.ai-models'))[ 0 ] + 'Which model would you like to use?', array_keys(config('ai-translate.ai-models')), array_keys(config('ai-translate.ai-models'))[ 0 ] ); - } else { + } + else { $models = config('ai-translate.ai-models'); $model = $this->option('cheapest') ? array_keys(config('ai-translate.ai-models'))[ 0 ] : end($models); } - + $this->info("Going to translate using $model"); $this->model = $model; $this->setMaxInputStringLength(); - + return true; } - + + /** + * Deliberately keeping the chunks smaller to avoid timeout issues with long inputs. + * With GPT-3.5 this equates to 2700 characters input per request + * + * @return void + */ + private function setMaxInputStringLength() { + $maxTokens = config('ai-translate.ai-models')[ $this->model ][ 'max_tokens' ]; + $maxCharactersTotal = $maxTokens * 4; + $this->maxInputStringLength = round($maxCharactersTotal / 6, 0); + } + /** * They should always be an array so we can just include it. + * * @param $file * * @return mixed */ - public function readLanguageFile($file) - { - if(! file_exists($file)) { + public function readLanguageFile($file) { + if(!file_exists($file)) { return []; } switch (FileHelper::getExtention($file)) { @@ -298,7 +221,7 @@ public function readLanguageFile($file) return json_decode(file_get_contents($file), true); } } - + /** * Splits a file into chunks of a given length * @@ -307,23 +230,22 @@ public function readLanguageFile($file) * * @return array The chunks */ - public function chunkArray($array) - { + public function chunkArray($array) { $chunks = []; $chunk = []; $length = 0; - + // Flatten the array using Laravel's helper function $flattenedArray = Arr::dot($array); - + foreach ($flattenedArray as $key => $value) { // If the value is an empty array, skip to the next iteration if(is_array($value) && empty($value)) { continue; } - + $itemLength = strlen($value); - if($length + $itemLength > $this->maxInputStringLength && ! empty($chunk)) { + if($length + $itemLength > $this->maxInputStringLength && !empty($chunk)) { // If adding this item will exceed the max length, save the current chunk and start a new one $chunks[] = $chunk; $chunk = []; @@ -332,77 +254,161 @@ public function chunkArray($array) $chunk[ $key ] = $value; $length += $itemLength; } - if(! empty($chunk)) { + if(!empty($chunk)) { // Add the last chunk if it's not empty $chunks[] = $chunk; } - + return $chunks; } - - public function createCheckOrEmptyTargetFile($file, $locale) - { + + /** + * Create the target file and process the input chunked array + * + * @param $file + * @param $chunks + * @param $targetLocale + * @param $targetLanguage + * + * @return void + */ + private function handleTargetLocale($file, $chunks, $targetLocale, $targetLanguage) { + if($targetFile = $this->createCheckOrEmptyTargetFile($file, $targetLocale)) { + $this->handleExistingTargetFile($targetFile, $chunks); + + foreach ($chunks as $chunk) { + $this->processChunk($targetFile, $chunk, $targetLanguage); + } + } + } + + public function createCheckOrEmptyTargetFile($file, $locale) { // Replace source locale with target locale in the path - switch(FileHelper::getExtention($file)) { + switch (FileHelper::getExtention($file)) { case('php'): //php files are in their own locale dir $newFile = str_replace('/'.$this->sourceLocale.'/', '/'.$locale.'/', $file); - + break; case('json'): //Json files are in the same dir $newFile = str_replace($this->sourceLocale.'.json', $locale.'.json', $file); } - - + // If overwrite is false and file already exists, make sure it contains an array - if(! $this->overwrite && file_exists($newFile)) { + if(!$this->overwrite && file_exists($newFile)) { return is_array($this->readLanguageFile($newFile)) ? $newFile : false; } - + // Create directory structure if it doesn't exist $directory = dirname($newFile); - if(! file_exists($directory)) { + if(!file_exists($directory)) { mkdir($directory, 0755, true); } - - switch(FileHelper::getExtention($file)) { + + switch (FileHelper::getExtention($file)) { case('php'): - $comment = "/**\n * Auto Translated by visualbuilder/ai-translate on " . date("d/m/Y") . "\n */"; - file_put_contents($newFile, "readLanguageFile($targetFile); + if(count($targetArray) !== 0 && !$this->overwrite) { + $diffArray = array_diff_key($this->sourceData, $targetArray); + $chunks = $this->chunkArray($diffArray); + } + } + + /** + * Gracefully try translation, with some retries if fails and save the result to the target + * + * @param $targetFile + * @param $chunk + * @param $targetLanguage + * + * @return void + */ + private function processChunk($targetFile, $chunk, $targetLanguage) { + $retryCount = 0; + $complete = false; + while ($retryCount < $this->maxRetries && !$complete) { + try { + $translatedChunk = + OpenAiHelper::translateChunk($this, $chunk, $this->sourceLocale, $targetLanguage, $this->model); + $this->appendResponse($targetFile, $translatedChunk[ 'translatedChunk' ]); + $complete = true; + } + catch (\Exception $e) { + $this->handleRetry($retryCount, $e, $targetFile); + } + } + $this->handleRetryFailure($retryCount, $targetFile); + } + + public function appendResponse($filename, $translatedChunk) { // Fetch existing content $existingContent = $this->readLanguageFile($filename); - + // Undot the translated chunk $undottedTranslatedChunk = Arr::undot($translatedChunk); - + // Merge new translations with existing content $newContent = array_merge($existingContent, $undottedTranslatedChunk); - - $comment = "/**\n * Auto Translated by visualbuilder/ai-translate on " . date("d/m/Y") . "\n */"; - + + $comment = "/**\n * Auto Translated by visualbuilder/ai-translate on ".date("d/m/Y")."\n */"; + switch (FileHelper::getExtention($filename)) { case('php'): - $output = "error('An error occurred while processing the file: '.$targetFile); + $this->error('Error message: '.$exception->getMessage()); + $this->info("Retrying $retryCount / $this->maxRetries Times in ".pow(2, $retryCount).' seconds'); + usleep(pow(2, $retryCount) * 1000000); // Wait for 2^retryCount seconds + } + + private function handleRetryFailure($retryCount, $targetFile) { + if($retryCount === $this->maxRetries) { + $this->error("Request failed after ".$this->maxRetries." retries"); + if(file_exists($targetFile)) { + unlink($targetFile); + } + } + } + + private function getCost() { + $cost = ($this->totalPromptTokens / 1000) * config('ai-translate.ai-models')[ $this->model ][ 'input_price' ]; + $cost += ($this->totalCompletionTokens / 1000) + * config('ai-translate.ai-models')[ $this->model ][ 'output_price' ]; + + return $cost; + } + /** * Recursively merges arrays * @@ -411,16 +417,16 @@ public function appendResponse($filename, $translatedChunk) * * @return array The merged array */ - private function mergeArray($array1, $array2) - { + private function mergeArray($array1, $array2) { foreach ($array2 as $key => $value) { if(array_key_exists($key, $array1) && is_array($array1[ $key ]) && is_array($value)) { $array1[ $key ] = $this->mergeArray($array1[ $key ], $value); - } else { + } + else { $array1[ $key ] = $value; } } - + return $array1; } }