From 5e62525760e63b4528096ab6754f56fe28e15844 Mon Sep 17 00:00:00 2001
From: QuartzLibrary <81446760+QuartzLibrary@users.noreply.github.com>
Date: Fri, 3 Apr 2026 05:09:21 +0100
Subject: [PATCH 1/3] Update settings.json

---
 .vscode/settings.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9463976..e85d955 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,3 @@
 {
-  "rust-analyzer.cargo.features": ["cli"]
-}
+  "rust-analyzer.cargo.features": "all",
+}
\ No newline at end of file

From 3748b29877863364ce1e38b1856ea26986c1b506 Mon Sep 17 00:00:00 2001
From: QuartzLibrary <81446760+QuartzLibrary@users.noreply.github.com>
Date: Tue, 7 Apr 2026 01:50:42 +0100
Subject: [PATCH 2/3] READMEs

---
 README.md                      |   6 +-
 packages/python/README.md      | 108 +++++++++++++++++++++++++++++++++
 packages/python/pyproject.toml |   1 +
 schema_analysis/README.md      |   8 ++-
 4 files changed, 118 insertions(+), 5 deletions(-)
 create mode 100644 packages/python/README.md

diff --git a/README.md b/README.md
index cf75067..409a3f6 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,9 @@ our gymnast friend, serde.
 - Works with any self-describing format with a Serde implementation.
 - Suitable for large files.
 - Keeps track of some useful info for each type.
-- Keeps track of null/normal/missing/duplicate values separately.
+- Keeps track of null/missing/duplicate values separately.
 - Integrates with [Schemars](https://github.com/GREsau/schemars) and 
-  [json_typegen](https://github.com/evestera/json_typegen) to produce types and json schema if needed.
+  [json_typegen](https://github.com/evestera/json_typegen) to produce types and a json schema if needed.
 - There's a demo website [here](https://schema-analysis.com/).
 
 ### Installation
@@ -42,7 +42,7 @@ cargo install schema_analysis --features cli --locked
 
 ### CLI Usage
 
-The `schema_analysis` binary can infer schemas and generate types directly from the command line.
+`schema_analysis` can infer schemas and generate types from data directly from the command line.
 
 ```
 schema_analysis [OPTIONS] [FILES]...
diff --git a/packages/python/README.md b/packages/python/README.md
new file mode 100644
index 0000000..5fad526
--- /dev/null
+++ b/packages/python/README.md
@@ -0,0 +1,108 @@
+# schema_analysis
+
+## Universal-ish Schema Analysis
+
+Ever wished you could figure out what was in that json file? Or maybe it was xml... Ehr, yaml?
+It was definitely toml.
+
+Alas, many great tools will only work with one of those formats, and the internet is not so
+nice a place as to finally understand that no, xml is not an acceptable data format.
+
+Enter this neat little tool, a single interface to any self-describing format supported by
+our gymnast friend, serde.
+
+### Features
+
+- Works with any self-describing format with a Serde implementation.
+- Suitable for large files.
+- Keeps track of some useful info for each type (opt out with --no-analysis).
+- Keeps track of null/missing/duplicate values separately.
+- Integrates with [Schemars](https://github.com/GREsau/schemars) and 
+  [json_typegen](https://github.com/evestera/json_typegen) to produce types and a json schema if needed.
+- There's a demo website [here](https://schema-analysis.com/).
+
+### Installation
+
+```bash
+# Run without installing
+uvx schema_analysis data.json
+# or
+pipx run schema_analysis data.json
+
+# Install
+pip install schema_analysis
+# or
+uv tool install schema_analysis
+# or
+cargo install schema_analysis --features cli --locked
+```
+
+### CLI Usage
+
+`schema_analysis` can infer schemas and generate types from data directly from the command line.
+
+```
+schema_analysis [OPTIONS] [FILES]...
+```
+
+It auto-detects the input format from file extensions (`.json`, `.yaml`/`.yml`, `.xml`, `.toml`, `.cbor`, `.bson`)
+and reads from stdin if no files are provided.
+
+**Options:**
+
+| Option | Description | Default |
+| --- | --- | --- |
+| `--format <FORMAT>` | Override input format (`json`, `yaml`, `xml`, `toml`, `cbor`, `bson`) | auto-detected |
+| `--output <OUTPUT>` | Output mode (`schema`, `rust`, `typescript`, `typescript-alias`, `kotlin`, `kotlin-kotlinx`, `json-schema`, `shape`) | `schema` |
+| `--name <NAME>` | Root type name for code generation | `Root` |
+| `--compact` | Compact JSON output (no pretty printing) | |
+| `--no-analysis` | Skip analysis info (counts, samples, min/max, etc.), outputting only the schema structure | |
+
+**Examples:**
+
+```bash
+# Infer a schema from a JSON file
+schema_analysis data.json
+
+# Generate Rust types
+schema_analysis data.json --output rust --name MyData
+
+# Generate TypeScript interfaces
+schema_analysis api.json --output typescript --name ApiResponse
+
+# Generate JSON Schema
+schema_analysis data.json --output json-schema
+
+# Merge multiple files into a single schema
+schema_analysis file1.json file2.json file3.json
+
+# Read from stdin
+cat data.json | schema_analysis --format json
+```
+
+### Library Usage
+
+For use as a library, see the [Rust crate](https://crates.io/crates/schema_analysis/) or the [repo](https://github.com/QuartzLibrary/schema_analysis).
+
+### Performance
+
+> These are not proper benchmarks, but should give a vague idea of the performance on a i7-7700HQ laptop (2017) laptop with the raw data already loaded into memory.
+
+| Size                  | wasm (MB/s)  | native (MB/s) | Format | File # |
+| --------------------- | ------------ | ------------- | ------ | ------ |
+| [~180MB]              | ~20s (9)     | ~5s (36)      | json   | 1      |
+| [~650MB]              | ~150s (4.3)  | ~50s (13)     | json   | 1      |
+| [~1.7GB]              | ~470s (3.6)  | ~145s (11.7)  | json   | 1      |
+| [~2.1GB]              | <sup>a</sup> | ~182s (11.5)  | json   | 1      |
+| [~13.3GB]<sup>b</sup> |              | ~810s (16.4)  | xml    | ~200k  |
+
+<sup>a</sup> This one seems to go over some kind of browser limit when fetching the data in the Web Worker, I believe I would have to split large files to handle it.
+
+<sup>b</sup> ~2.7GB compressed. This one seems like it would be a worst-case scenario because it includes decompression overhead and the files had a section that was formatted text which resulted in crazy schemas. (The json pretty printed schema was almost 0.5GB!)
+
+
+[~180MB]: https://github.com/zemirco/sf-city-lots-json/blob/master/citylots.json
+[~650MB]: https://catalog.data.gov/dataset/forestry-planting-spaces
+[~1.7GB]: https://catalog.data.gov/dataset/nys-thruway-origin-and-destination-points-for-all-vehicles-15-minute-intervals-2018-q4
+[~2.1GB]: https://catalog.data.gov/dataset/turnstile-usage-data-2016
+[~13.3GB]: https://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_bulk/
diff --git a/packages/python/pyproject.toml b/packages/python/pyproject.toml
index 720a009..f70173a 100644
--- a/packages/python/pyproject.toml
+++ b/packages/python/pyproject.toml
@@ -6,6 +6,7 @@ build-backend = "maturin"
 name = "schema_analysis"
 version = "0.6.0"
 description = "Infer schemas from JSON, YAML, XML, TOML, CBOR, and BSON"
+readme = "README.md"
 license = { text = "MIT OR Apache-2.0" }
 requires-python = ">=3.8"
 authors = [{ name = "QuartzLibrary" }]
diff --git a/schema_analysis/README.md b/schema_analysis/README.md
index d9123f8..2747cc1 100644
--- a/schema_analysis/README.md
+++ b/schema_analysis/README.md
@@ -19,9 +19,9 @@ our gymnast friend, serde.
 - Works with any self-describing format with a Serde implementation.
 - Suitable for large files.
 - Keeps track of some useful info for each type.
-- Keeps track of null/normal/missing/duplicate values separately.
+- Keeps track of null/missing/duplicate values separately.
 - Integrates with [Schemars](https://github.com/GREsau/schemars) and 
-  [json_typegen](https://github.com/evestera/json_typegen) to produce types and json schema if needed.
+  [json_typegen](https://github.com/evestera/json_typegen) to produce types and a json schema if needed.
 - There's a demo website [here](https://schema-analysis.com/).
 
 ### Usage
@@ -52,6 +52,10 @@ Check [Schema](https://docs.rs/schema_analysis/latest/schema_analysis/enum.Schem
 to see what info you get, and [targets](https://github.com/QuartzLibrary/schema_analysis/blob/HEAD/schema_analysis/src/targets) 
 to see the available integrations (which include code and json schema generation).
 
+### CLI Usage
+
+You can use this crate as a CLI, more info in the [repo](https://github.com/QuartzLibrary/schema_analysis).
+
 ### Advanced Usage
 
 I know, I know, the internet is evil and has decided to plague you with not one, but thousands,

From 1e929c478b3eb926adb852246bd8b2b787b271c5 Mon Sep 17 00:00:00 2001
From: QuartzLibrary <81446760+QuartzLibrary@users.noreply.github.com>
Date: Wed, 8 Apr 2026 17:53:04 +0100
Subject: [PATCH 3/3] Rename CLI flag

---
 README.md                                                   | 2 +-
 packages/python/README.md                                   | 4 ++--
 schema_analysis/src/main.rs                                 | 4 ++--
 schema_analysis/tests/cli.rs                                | 6 +++---
 ...son_schema_no_analysis.json => json_schema_minimal.json} | 0
 5 files changed, 8 insertions(+), 8 deletions(-)
 rename schema_analysis/tests/cli_fixtures/expected/{json_schema_no_analysis.json => json_schema_minimal.json} (100%)

diff --git a/README.md b/README.md
index 409a3f6..8a732c1 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@ and reads from stdin if no files are provided.
 | `--output <OUTPUT>` | Output mode (`schema`, `rust`, `typescript`, `typescript-alias`, `kotlin`, `kotlin-kotlinx`, `json-schema`, `shape`) | `schema` |
 | `--name <NAME>` | Root type name for code generation | `Root` |
 | `--compact` | Compact JSON output (no pretty printing) | |
-| `--no-analysis` | Skip analysis info (counts, samples, min/max, etc.), outputting only the schema structure | |
+| `--minimal` | Skip analysis info (counts, samples, min/max, etc.), outputting only the schema structure | |
 
 **Examples:**
 
diff --git a/packages/python/README.md b/packages/python/README.md
index 5fad526..1b49ea5 100644
--- a/packages/python/README.md
+++ b/packages/python/README.md
@@ -15,7 +15,7 @@ our gymnast friend, serde.
 
 - Works with any self-describing format with a Serde implementation.
 - Suitable for large files.
-- Keeps track of some useful info for each type (opt out with --no-analysis).
+- Keeps track of some useful info for each type (opt out with --minimal).
 - Keeps track of null/missing/duplicate values separately.
 - Integrates with [Schemars](https://github.com/GREsau/schemars) and 
   [json_typegen](https://github.com/evestera/json_typegen) to produce types and a json schema if needed.
@@ -56,7 +56,7 @@ and reads from stdin if no files are provided.
 | `--output <OUTPUT>` | Output mode (`schema`, `rust`, `typescript`, `typescript-alias`, `kotlin`, `kotlin-kotlinx`, `json-schema`, `shape`) | `schema` |
 | `--name <NAME>` | Root type name for code generation | `Root` |
 | `--compact` | Compact JSON output (no pretty printing) | |
-| `--no-analysis` | Skip analysis info (counts, samples, min/max, etc.), outputting only the schema structure | |
+| `--minimal` | Skip analysis info (counts, samples, min/max, etc.), outputting only the schema structure | |
 
 **Examples:**
 
diff --git a/schema_analysis/src/main.rs b/schema_analysis/src/main.rs
index f6bced2..70d1a57 100644
--- a/schema_analysis/src/main.rs
+++ b/schema_analysis/src/main.rs
@@ -39,7 +39,7 @@ struct Cli {
 
     /// Only output the schema structure, without analysis info (counts, samples, min/max, etc.)
     #[arg(long)]
-    no_analysis: bool,
+    minimal: bool,
 }
 
 #[derive(Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
@@ -73,7 +73,7 @@ fn main() -> Result<()> {
     }
     let format = resolve_format(&cli)?;
 
-    let output = if cli.no_analysis {
+    let output = if cli.minimal {
         let mut schema = infer_schema::<()>(format, &cli.files)?;
         if format == InputFormat::Xml {
             cleanup_xml_schema(&mut schema);
diff --git a/schema_analysis/tests/cli.rs b/schema_analysis/tests/cli.rs
index e09d892..fa83e46 100644
--- a/schema_analysis/tests/cli.rs
+++ b/schema_analysis/tests/cli.rs
@@ -100,14 +100,14 @@ fn compact_flag() {
 }
 
 #[test]
-fn no_analysis_flag() {
+fn minimal_flag() {
     cmd()
         .arg(input("sample.json"))
-        .arg("--no-analysis")
+        .arg("--minimal")
         .assert()
         .success()
         .stdout(include_str!(
-            "cli_fixtures/expected/json_schema_no_analysis.json"
+            "cli_fixtures/expected/json_schema_minimal.json"
         ));
 }
 
diff --git a/schema_analysis/tests/cli_fixtures/expected/json_schema_no_analysis.json b/schema_analysis/tests/cli_fixtures/expected/json_schema_minimal.json
similarity index 100%
rename from schema_analysis/tests/cli_fixtures/expected/json_schema_no_analysis.json
rename to schema_analysis/tests/cli_fixtures/expected/json_schema_minimal.json