import manticoresearch from manticoresearch.rest import ApiException import json # Defining the host is optional and defaults to http://127.0.0.1:9308 configuration = manticoresearch.Configuration( host = "http://127.0.0.1:9308" ) # Enter a context with an instance of the API client with manticoresearch.ApiClient(configuration) as api_client: # Create an instance of the API class api_instance = manticoresearch.UtilsApi(api_client) limit = 30 # Number of records per batch max_matches = 1700000 # Adjust this to a higher value depending on your server's capacity total_records = 1519237 # Total number of records to retrieve progress = 0 # Progress counter try: result_data = [] for offset in range(0, total_records, limit): body = f"SELECT * FROM big LIMIT {limit} OFFSET {offset} OPTION max_matches={max_matches}" api_response = api_instance.sql(body, raw_response=True)[0] # Get the first entry in the response data = api_response["data"] total_entries = len(data) # Total number of entries in this batch for entry in data: content = entry["content"] sdate = entry["sdate"] id_ = entry["id"] # Decode Hebrew content properly decoded_content = content.encode('utf-8').decode('unicode_escape') result_data.append({ "ID": id_, "Content": decoded_content, "SDate": sdate }) progress += 1 # Update the progress here to keep track # Once all records are processed, display the final progress print(f"Processed {progress}/{total_records} records - Overall progress: {(progress / total_records) * 100:.2f}%") # Save the data to a JSON file with open('data.json', 'w', encoding='utf-8') as json_file: json.dump(result_data, json_file, indent=4, ensure_ascii=False) # `ensure_ascii=False` to preserve the original characters print("Data has been successfully saved to data.json") except Exception as e: print("Exception when calling UtilsApi->sql: %s\n" % e)