Patch files not being passed properly
This commit is contained in:
parent
94d3a90438
commit
9035e00da3
3 changed files with 535 additions and 41 deletions
451
rust-engine/Cargo.lock
generated
451
rust-engine/Cargo.lock
generated
|
|
@ -2,6 +2,21 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "adobe-cmap-parser"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d3da9d617508ab8102c22f05bd772fc225ecb4fde431e38a45284e5c129a4bc"
|
||||
dependencies = [
|
||||
"pom 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
|
|
@ -31,7 +46,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -55,6 +70,12 @@ version = "1.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||
|
||||
[[package]]
|
||||
name = "base-x"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4cbbc9d0964165b47557570cce6c952866c2678457aca742aafc9fb771d30270"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.22.1"
|
||||
|
|
@ -85,6 +106,17 @@ dependencies = [
|
|||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.19.0"
|
||||
|
|
@ -154,6 +186,12 @@ version = "0.9.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
|
||||
|
||||
[[package]]
|
||||
name = "const_fn"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f8a2ca5ac02d09563609681103aada9e1777d54fc57a5acd7a41404f9c93b6e"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
|
|
@ -194,6 +232,15 @@ version = "2.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-queue"
|
||||
version = "0.3.12"
|
||||
|
|
@ -242,6 +289,12 @@ dependencies = [
|
|||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "discard"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "212d0f5754cb6769937f4501cc0e67f4f4483c8d2c3e1e922ee9edbe4ab4c7c0"
|
||||
|
||||
[[package]]
|
||||
name = "displaydoc"
|
||||
version = "0.2.5"
|
||||
|
|
@ -250,7 +303,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -268,6 +321,70 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding"
|
||||
version = "0.2.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
|
||||
dependencies = [
|
||||
"encoding-index-japanese",
|
||||
"encoding-index-korean",
|
||||
"encoding-index-simpchinese",
|
||||
"encoding-index-singlebyte",
|
||||
"encoding-index-tradchinese",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-japanese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-korean"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-simpchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-singlebyte"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding-index-tradchinese"
|
||||
version = "1.20141219.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
|
||||
dependencies = [
|
||||
"encoding_index_tests",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_index_tests"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.35"
|
||||
|
|
@ -304,6 +421,15 @@ dependencies = [
|
|||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "euclid"
|
||||
version = "0.20.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "5.4.1"
|
||||
|
|
@ -327,6 +453,16 @@ version = "0.1.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.11.1"
|
||||
|
|
@ -426,7 +562,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -552,7 +688,7 @@ dependencies = [
|
|||
"http",
|
||||
"httpdate",
|
||||
"mime",
|
||||
"sha1",
|
||||
"sha1 0.10.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -946,6 +1082,12 @@ dependencies = [
|
|||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linked-hash-map"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8dd5a6d5999d9907cda8ed67bbd137d3af8085216c2ac62de5be860bd41f304a"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.11.0"
|
||||
|
|
@ -973,6 +1115,22 @@ version = "0.4.28"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
|
||||
|
||||
[[package]]
|
||||
name = "lopdf"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de0f69c40d6dbc68ebac4bf5aec3d9978e094e22e29fcabd045acd9cec74a9dc"
|
||||
dependencies = [
|
||||
"encoding",
|
||||
"flate2",
|
||||
"itoa",
|
||||
"linked-hash-map",
|
||||
"log",
|
||||
"pom 3.4.0",
|
||||
"time",
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru-slab"
|
||||
version = "0.1.2"
|
||||
|
|
@ -1011,6 +1169,16 @@ dependencies = [
|
|||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "1.1.0"
|
||||
|
|
@ -1141,7 +1309,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1191,6 +1359,22 @@ dependencies = [
|
|||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdf-extract"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0f21fc45e1b40af7e6c7ca32af35464c1ea7a92e5d2e1465d08c8389e033240"
|
||||
dependencies = [
|
||||
"adobe-cmap-parser",
|
||||
"encoding",
|
||||
"euclid",
|
||||
"linked-hash-map",
|
||||
"lopdf",
|
||||
"postscript",
|
||||
"type1-encoding-parser",
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem-rfc7468"
|
||||
version = "0.7.0"
|
||||
|
|
@ -1223,7 +1407,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1265,6 +1449,27 @@ version = "0.3.32"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "pom"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6"
|
||||
|
||||
[[package]]
|
||||
name = "pom"
|
||||
version = "3.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c972d8f86e943ad532d0b04e8965a749ad1d18bb981a9c7b3ae72fe7fd7744b"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postscript"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78451badbdaebaf17f053fd9152b3ffb33b516104eacb45e7864aaa9c712f306"
|
||||
|
||||
[[package]]
|
||||
name = "potential_utf"
|
||||
version = "0.1.3"
|
||||
|
|
@ -1283,6 +1488,12 @@ dependencies = [
|
|||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.20+deprecated"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.101"
|
||||
|
|
@ -1430,6 +1641,12 @@ dependencies = [
|
|||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.24"
|
||||
|
|
@ -1519,6 +1736,7 @@ dependencies = [
|
|||
"dotenvy",
|
||||
"futures-util",
|
||||
"lazy_static",
|
||||
"pdf-extract",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
|
@ -1537,6 +1755,15 @@ version = "2.1.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
|
||||
dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "1.1.2"
|
||||
|
|
@ -1641,6 +1868,21 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
|
||||
dependencies = [
|
||||
"semver-parser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver-parser"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
|
|
@ -1668,7 +1910,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1696,6 +1938,15 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1da05c97445caa12d05e848c4a4fcbbea29e748ac28f7e80e9b010392063770"
|
||||
dependencies = [
|
||||
"sha1_smol",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1"
|
||||
version = "0.10.6"
|
||||
|
|
@ -1707,6 +1958,12 @@ dependencies = [
|
|||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1_smol"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.9"
|
||||
|
|
@ -1752,6 +2009,12 @@ dependencies = [
|
|||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simd-adler32"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.11"
|
||||
|
|
@ -1857,7 +2120,7 @@ dependencies = [
|
|||
"quote",
|
||||
"sqlx-core",
|
||||
"sqlx-macros-core",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1880,7 +2143,7 @@ dependencies = [
|
|||
"sqlx-mysql",
|
||||
"sqlx-postgres",
|
||||
"sqlx-sqlite",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
"tokio",
|
||||
"url",
|
||||
]
|
||||
|
|
@ -1918,7 +2181,7 @@ dependencies = [
|
|||
"rand 0.8.5",
|
||||
"rsa",
|
||||
"serde",
|
||||
"sha1",
|
||||
"sha1 0.10.6",
|
||||
"sha2",
|
||||
"smallvec",
|
||||
"sqlx-core",
|
||||
|
|
@ -2000,6 +2263,64 @@ version = "1.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
||||
|
||||
[[package]]
|
||||
name = "standback"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff"
|
||||
dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdweb"
|
||||
version = "0.4.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"
|
||||
dependencies = [
|
||||
"discard",
|
||||
"rustc_version",
|
||||
"stdweb-derive",
|
||||
"stdweb-internal-macros",
|
||||
"stdweb-internal-runtime",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdweb-derive"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdweb-internal-macros"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11"
|
||||
dependencies = [
|
||||
"base-x",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"sha1 0.6.1",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stdweb-internal-runtime"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
|
||||
|
||||
[[package]]
|
||||
name = "stringprep"
|
||||
version = "0.1.5"
|
||||
|
|
@ -2017,6 +2338,17 @@ version = "2.6.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.107"
|
||||
|
|
@ -2045,7 +2377,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2099,7 +2431,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2111,6 +2443,44 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.2.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242"
|
||||
dependencies = [
|
||||
"const_fn",
|
||||
"libc",
|
||||
"standback",
|
||||
"stdweb",
|
||||
"time-macros",
|
||||
"version_check",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time-macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"time-macros-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time-macros-impl"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd3c141a1b43194f3f56a1411225df8646c55781d5f26db825b3d98507eb482f"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"standback",
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinystr"
|
||||
version = "0.8.1"
|
||||
|
|
@ -2161,7 +2531,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2273,7 +2643,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2317,6 +2687,15 @@ version = "0.2.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "type1-encoding-parser"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3d6cc09e1a99c7e01f2afe4953789311a1c50baebbdac5b477ecf78e2e92a5b"
|
||||
dependencies = [
|
||||
"pom 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.19.0"
|
||||
|
|
@ -2493,7 +2872,7 @@ dependencies = [
|
|||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
|
|
@ -2528,7 +2907,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
|
@ -2580,6 +2959,12 @@ dependencies = [
|
|||
"rustls-pki-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "weezl"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3"
|
||||
|
||||
[[package]]
|
||||
name = "whoami"
|
||||
version = "1.6.1"
|
||||
|
|
@ -2590,6 +2975,28 @@ dependencies = [
|
|||
"wasite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.2"
|
||||
|
|
@ -2611,7 +3018,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2622,7 +3029,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2947,7 +3354,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
|
|
@ -2968,7 +3375,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2988,7 +3395,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
|
|
@ -3028,5 +3435,5 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"syn 2.0.107",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -23,3 +23,4 @@ tokio-util = "0.7"
|
|||
futures-util = "0.3"
|
||||
lazy_static = "1.4"
|
||||
bytes = "1.4"
|
||||
pdf-extract = "0.6"
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
use crate::gemini_client::{demo_text_embedding, generate_text_with_model, DEMO_EMBED_DIM};
|
||||
use crate::vector;
|
||||
use crate::vector_db::QdrantClient;
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use pdf_extract::extract_text;
|
||||
use sqlx::MySqlPool;
|
||||
use tracing::{error, info};
|
||||
use std::path::PathBuf;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
pub struct FileWorker {
|
||||
pool: MySqlPool,
|
||||
|
|
@ -72,15 +74,31 @@ impl FileWorker {
|
|||
.fetch_one(&self.pool)
|
||||
.await?;
|
||||
let filename: String = row.get("filename");
|
||||
let _path: String = row.get("path");
|
||||
let path: String = row.get("path");
|
||||
|
||||
let (file_excerpt, truncated) = match extract_file_excerpt(&path).await {
|
||||
Ok(res) => res,
|
||||
Err(err) => {
|
||||
error!(file_id, %filename, %path, error = ?err, "failed to extract text from file; continuing with filename only");
|
||||
(String::new(), false)
|
||||
}
|
||||
};
|
||||
if file_excerpt.is_empty() {
|
||||
warn!(file_id, %filename, %path, "extracted excerpt is empty; prompts may lack context");
|
||||
}
|
||||
|
||||
let excerpt_note = if truncated {
|
||||
"(excerpt truncated for prompt size)"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
// Stage 1: Gemini 2.5 Flash for description
|
||||
let desc = generate_text_with_model(
|
||||
"gemini-2.5-flash",
|
||||
&format!(
|
||||
"Describe the file '{filename}' and extract all key components, keywords, and details for later vectorization. Be comprehensive and factual."
|
||||
),
|
||||
)
|
||||
let desc_prompt = format!(
|
||||
"You are reviewing the PDF file '{filename}'. Use the following extracted text {excerpt_note} to produce a concise, factual description and key highlights that will help downstream search and reasoning.\n\n--- BEGIN EXCERPT ---\n{}\n--- END EXCERPT ---",
|
||||
file_excerpt
|
||||
);
|
||||
let desc = generate_text_with_model("gemini-2.5-flash", &desc_prompt)
|
||||
.await
|
||||
.unwrap_or_else(|e| format!("[desc error: {}]", e));
|
||||
sqlx::query(
|
||||
|
|
@ -92,12 +110,11 @@ impl FileWorker {
|
|||
.await?;
|
||||
|
||||
// Stage 2: Gemini 2.5 Pro for deep vector graph data
|
||||
let vector_graph = generate_text_with_model(
|
||||
"gemini-2.5-pro",
|
||||
&format!(
|
||||
"Given the file '{filename}' and its description: {desc}\nGenerate a set of vector graph data (keywords, use cases, relationships) that can be used for broad and precise search. Only include what is directly supported by the file."
|
||||
),
|
||||
)
|
||||
let vector_prompt = format!(
|
||||
"You are constructing vector search metadata for the PDF file '{filename}'.\nCurrent description: {desc}\nUse the extracted text {excerpt_note} below to derive precise keywords, thematic clusters, and relationships that are explicitly supported by the content. Provide richly structured bullet points grouped by themes.\n\n--- BEGIN EXCERPT ---\n{}\n--- END EXCERPT ---",
|
||||
file_excerpt
|
||||
);
|
||||
let vector_graph = generate_text_with_model("gemini-2.5-pro", &vector_prompt)
|
||||
.await
|
||||
.unwrap_or_else(|e| format!("[vector error: {}]", e));
|
||||
|
||||
|
|
@ -138,3 +155,72 @@ impl FileWorker {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Maximum number of characters from the extracted text to include in prompts.
|
||||
const MAX_EXCERPT_CHARS: usize = 4000;
|
||||
|
||||
async fn extract_file_excerpt(path: &str) -> Result<(String, bool)> {
|
||||
let path_buf = PathBuf::from(path);
|
||||
let extension = path_buf
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|s| s.to_ascii_lowercase())
|
||||
.unwrap_or_default();
|
||||
|
||||
let raw_text = if extension == "pdf" {
|
||||
let pdf_path = path_buf.clone();
|
||||
tokio::task::spawn_blocking(move || extract_text(&pdf_path))
|
||||
.await
|
||||
.map_err(|e| anyhow!("pdf text extraction task panicked: {e}"))??
|
||||
} else {
|
||||
let bytes = tokio::fs::read(&path_buf)
|
||||
.await
|
||||
.with_context(|| format!("reading file bytes from {path}"))?;
|
||||
String::from_utf8_lossy(&bytes).into_owned()
|
||||
};
|
||||
|
||||
let cleaned = raw_text.replace('\r', "");
|
||||
let condensed = collapse_whitespace(&cleaned);
|
||||
let (excerpt, truncated) = truncate_to_chars(&condensed, MAX_EXCERPT_CHARS);
|
||||
|
||||
Ok((excerpt, truncated))
|
||||
}
|
||||
|
||||
fn truncate_to_chars(text: &str, max_chars: usize) -> (String, bool) {
|
||||
if max_chars == 0 {
|
||||
return (String::new(), !text.is_empty());
|
||||
}
|
||||
|
||||
let mut result = String::new();
|
||||
let mut chars = text.chars();
|
||||
for _ in 0..max_chars {
|
||||
match chars.next() {
|
||||
Some(ch) => result.push(ch),
|
||||
None => return (result, false),
|
||||
}
|
||||
}
|
||||
|
||||
if chars.next().is_some() {
|
||||
result.push('…');
|
||||
(result, true)
|
||||
} else {
|
||||
(result, false)
|
||||
}
|
||||
}
|
||||
|
||||
fn collapse_whitespace(input: &str) -> String {
|
||||
let mut output = String::with_capacity(input.len());
|
||||
let mut prev_was_ws = false;
|
||||
for ch in input.chars() {
|
||||
if ch.is_whitespace() {
|
||||
if !prev_was_ws {
|
||||
output.push(' ');
|
||||
}
|
||||
prev_was_ws = true;
|
||||
} else {
|
||||
prev_was_ws = false;
|
||||
output.push(ch);
|
||||
}
|
||||
}
|
||||
output.trim().to_string()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue