import manticoresearch
from manticoresearch.rest import ApiException
import json

# Defining the host is optional and defaults to http://127.0.0.1:9308
configuration = manticoresearch.Configuration(
    host = "http://127.0.0.1:9308"
)

# Enter a context with an instance of the API client
with manticoresearch.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = manticoresearch.UtilsApi(api_client)
    limit = 30  # Number of records per batch
    max_matches = 1700000 # Adjust this to a higher value depending on your server's capacity
    total_records = 1519237  # Total number of records to retrieve
    progress = 0  # Progress counter

    try:
        result_data = []
        for offset in range(0, total_records, limit):
            body = f"SELECT * FROM big LIMIT {limit} OFFSET {offset} OPTION max_matches={max_matches}"
            api_response = api_instance.sql(body, raw_response=True)[0]  # Get the first entry in the response
            
            data = api_response["data"]
            total_entries = len(data)  # Total number of entries in this batch
            
            for entry in data:
                content = entry["content"]
                sdate = entry["sdate"]
                id_ = entry["id"]
                
                # Decode Hebrew content properly
                decoded_content = content.encode('utf-8').decode('unicode_escape')
                
                result_data.append({
                    "ID": id_,
                    "Content": decoded_content,
                    "SDate": sdate
                })
                
                progress += 1  # Update the progress here to keep track

        # Once all records are processed, display the final progress
        print(f"Processed {progress}/{total_records} records - Overall progress: {(progress / total_records) * 100:.2f}%")

        # Save the data to a JSON file
        with open('data.json', 'w', encoding='utf-8') as json_file:
            json.dump(result_data, json_file, indent=4, ensure_ascii=False)  # `ensure_ascii=False` to preserve the original characters

        print("Data has been successfully saved to data.json")

    except Exception as e:
        print("Exception when calling UtilsApi->sql: %s\n" % e)