| Register | FAQ | Calendar | Search | Today's Posts | Mark Forums Read |
|
#1
|
| I created on home PC a table more than 1 Million rows using example from DB2 COOKBOOK: CREATE TABLE PERSONNEL (EMP# INTEGER NOT NULL ,SOCSEC# CHAR(11) NOT NULL ,JOB_FTN CHAR(4) NOT NULL ,DEPT SMALLINT NOT NULL ,SALARY DECIMAL(7,2) NOT NULL ,DATE_BN DATE NOT NULL WITH DEFAULT ,FST_NAME VARCHAR(20) ,LST_NAME VARCHAR(20) ,CONSTRAINT PEX1 PRIMARY KEY (EMP#) ,CONSTRAINT PE01 CHECK (EMP# > 0) ,CONSTRAINT PE02 CHECK (LOCATE(' ',SOCSEC#) = 0) ,CONSTRAINT PE03 CHECK (LOCATE('-',SOCSEC#,1) = 4) ,CONSTRAINT PE04 CHECK (LOCATE('-',SOCSEC#,5) = 7) ,CONSTRAINT PE05 CHECK (JOB_FTN <> '') ,CONSTRAINT PE06 CHECK (DEPT BETWEEN 1 AND 99) ,CONSTRAINT PE07 CHECK (SALARY BETWEEN 0 AND 99999) ,CONSTRAINT PE08 CHECK (FST_NAME <> '') ,CONSTRAINT PE09 CHECK (LST_NAME <> '') ,CONSTRAINT PE10 CHECK (DATE_BN >= '1900-01-01' )); COMMIT; CREATE UNIQUE INDEX PEX2 ON PERSONNEL (SOCSEC#); CREATE UNIQUE INDEX PEX3 ON PERSONNEL (EMP# ASC); COMMIT; INSERT INTO PERSONNEL WITH TEMP1 (S1,R1,R2,R3,R4) AS (VALUES (0 ,RAND(2) ,RAND()+(RAND()/1E5) ,RAND()* RAND() ,RAND()* RAND()* RAND()) UNION ALL SELECT S1 + 1 ,RAND() ,RAND()+(RAND()/1E5) ,RAND()* RAND() ,RAND()* RAND()* RAND() FROM TEMP1 WHERE S1 < 100000 ) SELECT 1000000 + S1 ,SUBSTR(DIGITS(INT(R2*988+10)),8) || '-' || SUBSTR(DIGITS(INT(R1*88+10)),9) || '-' || TRANSLATE(SUBSTR(DIGITS(S1),7),'9873450126','01234 56789') ,CASE WHEN INT(R4*9) > 7 THEN 'MGR' WHEN INT(R4*9) > 5 THEN 'SUPR' WHEN INT(R4*9) > 3 THEN 'PGMR' WHEN INT(R4*9) > 1 THEN 'SEC' ELSE 'WKR' END ,INT(R3*98+1) ,DECIMAL(R4*99999,7,2) ,DATE('1930-01-01') + INT(50-(R4*50)) YEARS + INT(R4*11) MONTHS + INT(R4*27) DAYS ,CHR(INT(R1*26+65))|| CHR(INT(R2*26+97))|| CHR(INT(R3*26+97))|| CHR(INT(R4*26+97))|| CHR(INT(R3*10+97))|| CHR(INT(R3*11+97)) ,CHR(INT(R2*26+65))|| TRANSLATE(CHAR(INT(R2*1E7)),'aaeeiibmty','01234567 89') FROM TEMP1; Emp# is unique but ssn contains repetable groups of duplicates Is it possible to write a fix query to making SSN also unique Table example: select * from personnel order by 2 fetch first 1000 rows only; MP# SOCSEC# JOB_FTN DEPT SALARY DATE_BN FST_NAME LST_NAME ----------- ----------- ------- ------ --------- ---------- ------------------ -- -------------------- 1043994 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1143995 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1243996 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1343997 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1443998 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1543999 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1644000 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1744001 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1844002 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1944003 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc Aeaet 1092444 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1192445 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1292446 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1392447 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1492448 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1592449 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1692450 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1792451 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1892452 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1992453 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa Ayeae 1041914 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1141915 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1241916 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1341917 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1441918 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1541919 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1641920 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat 1741921 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef Aytat .................................................. ........................ .................................................. .... Thank's in advance Leny G. -- Message posted via http://www.dbmonster.com |
|
#2
|
| On Sep 7, 5:53 pm, "lenygold via DBMonster.com" > I created on home PC a table more than 1 Million rows using example from DB2 > COOKBOOK: > > CREATE TABLE PERSONNEL > (EMP# INTEGER NOT NULL > ,SOCSEC# CHAR(11) NOT NULL > ,JOB_FTN CHAR(4) NOT NULL > ,DEPT SMALLINT NOT NULL > ,SALARY DECIMAL(7,2) NOT NULL > ,DATE_BN DATE NOT NULL WITH DEFAULT > ,FST_NAME VARCHAR(20) > ,LST_NAME VARCHAR(20) > ,CONSTRAINT PEX1 PRIMARY KEY (EMP#) > ,CONSTRAINT PE01 CHECK (EMP# > 0) > ,CONSTRAINT PE02 CHECK (LOCATE(' ',SOCSEC#) = 0) > ,CONSTRAINT PE03 CHECK (LOCATE('-',SOCSEC#,1) = 4) > ,CONSTRAINT PE04 CHECK (LOCATE('-',SOCSEC#,5) = 7) > ,CONSTRAINT PE05 CHECK (JOB_FTN <> '') > ,CONSTRAINT PE06 CHECK (DEPT BETWEEN 1 AND 99) > ,CONSTRAINT PE07 CHECK (SALARY BETWEEN 0 AND 99999) > ,CONSTRAINT PE08 CHECK (FST_NAME <> '') > ,CONSTRAINT PE09 CHECK (LST_NAME <> '') > ,CONSTRAINT PE10 CHECK (DATE_BN >= '1900-01-01' )); > COMMIT; > CREATE UNIQUE INDEX PEX2 ON PERSONNEL (SOCSEC#); > CREATE UNIQUE INDEX PEX3 ON PERSONNEL (EMP# ASC); > COMMIT; > > INSERT INTO PERSONNEL > WITH TEMP1 (S1,R1,R2,R3,R4) AS > (VALUES (0 > ,RAND(2) > ,RAND()+(RAND()/1E5) > ,RAND()* RAND() > ,RAND()* RAND()* RAND()) > UNION ALL > SELECT S1 + 1 > ,RAND() > ,RAND()+(RAND()/1E5) > ,RAND()* RAND() > ,RAND()* RAND()* RAND() > FROM TEMP1 > WHERE S1 < 100000 > ) > SELECT 1000000 + S1 > ,SUBSTR(DIGITS(INT(R2*988+10)),8) || '-' || > SUBSTR(DIGITS(INT(R1*88+10)),9) || '-' || > TRANSLATE(SUBSTR(DIGITS(S1),7),'9873450126','01234 56789') > ,CASE > WHEN INT(R4*9) > 7 THEN 'MGR' > WHEN INT(R4*9) > 5 THEN 'SUPR' > WHEN INT(R4*9) > 3 THEN 'PGMR' > WHEN INT(R4*9) > 1 THEN 'SEC' > ELSE 'WKR' > END > ,INT(R3*98+1) > ,DECIMAL(R4*99999,7,2) > ,DATE('1930-01-01') + INT(50-(R4*50)) YEARS > + INT(R4*11) MONTHS > + INT(R4*27) DAYS > ,CHR(INT(R1*26+65))|| CHR(INT(R2*26+97))|| CHR(INT(R3*26+97))|| > CHR(INT(R4*26+97))|| CHR(INT(R3*10+97))|| CHR(INT(R3*11+97)) > ,CHR(INT(R2*26+65))|| > TRANSLATE(CHAR(INT(R2*1E7)),'aaeeiibmty','01234567 89') > FROM TEMP1; > > Emp# is unique but ssn contains repetable groups of duplicates > Is it possible to write a fix query to making SSN also unique > > Table example: > select * from personnel > order by 2 > fetch first 1000 rows only; > > MP# SOCSEC# JOB_FTN DEPT SALARY DATE_BN FST_NAME > LST_NAME > ----------- ----------- ------- ------ --------- ---------- ------------------ > -- -------------------- > 1043994 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1143995 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1243996 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1343997 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1443998 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1543999 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1644000 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1744001 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1844002 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1944003 010-10-3664 WKR 22 48.93 01/01/1979 Aafacc > Aeaet > 1092444 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1192445 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1292446 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1392447 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1492448 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1592449 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1692450 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1792451 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1892452 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1992453 010-11-7444 WKR 2 11350.86 02/04/1974 Aaacaa > Ayeae > 1041914 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1141915 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1241916 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1341917 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1441918 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1541919 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1641920 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > 1741921 010-12-8684 WKR 49 2935.20 01/01/1978 Aamaef > Aytat > .................................................. ........................ > .................................................. ... > > Thank's in advance Leny G. > > -- > Message posted viahttp://www.dbmonster.com Lenny, Here's a common approach to deleting dups: DELETE FROM ( SELECT ROW_NUMBER() OVER (PARTITION BY EMP#) RN FROM PERSONNEL ) T WHERE T.RN > 1; Also, I applaud your diligence with CHECK constraints, but think you could probably combine constraints PE02 through PE04 by simply ANDing the conditions (but that would, admittedly, give you a less precise idea of what the issue was in the event the constraint was violated). BTW, if it turns out that you'll end up deleting more rows than you're keeping, you may want to consider INSERTing the "keeper" rows into a temporary/staging table and simply truncating the permanent table and re-INSERTING into it. Something like: INSERT INTO PERSONNEL_TEMP SELECT T.* FROM ( SELECT ROW_NUMBER() OVER (PARTITION BY EMP#) RN, EMP#, SOCSEC#, JOB_FTN, DEPT, SALARY, DATE_BN, FST_NAME, LST_NAME FROM PERSONNEL P ) T WHERE T.RN = 1; ALTER TABLE PERSONNEL ACTIVATE NOT LOGGED INITIALLY WITH EMPTY TABLE; INSERT INTO PERSONNEL SELECT * FROM PERSONNEL_TEMP; You can also do the truncating by loading the table from the null device. Regards |
|
#3
|
| Thank you jefftyzzer for your help. I don't have any problem with delteting dups. But what i am looking for is one UPDATE STATEMENT to fix SSN and making them Unique. This is test table and i don't care about SSN value. What do you think about Generate Unique? Thank's in advance Leny G. jefftyzzer wrote: >> I created on home PC a table more than 1 Million rows using example from DB2 >> COOKBOOK: >[quoted text clipped - 137 lines] >> -- >> Message posted viahttp://www.dbmonster.com > >Lenny, > >Here's a common approach to deleting dups: > >DELETE FROM > ( > SELECT > ROW_NUMBER() OVER (PARTITION BY EMP#) RN > FROM > PERSONNEL > ) T >WHERE > T.RN > 1; > >Also, I applaud your diligence with CHECK constraints, but think you >could probably combine constraints PE02 through PE04 by simply ANDing >the conditions (but that would, admittedly, give you a less precise >idea of what the issue was in the event the constraint was violated). > >BTW, if it turns out that you'll end up deleting more rows than you're >keeping, you may want to consider INSERTing the "keeper" rows into a >temporary/staging table and simply truncating the permanent table and >re-INSERTING into it. Something like: > >INSERT INTO PERSONNEL_TEMP >SELECT > T.* >FROM > ( > SELECT > ROW_NUMBER() OVER (PARTITION BY EMP#) RN, > EMP#, > SOCSEC#, > JOB_FTN, > DEPT, > SALARY, > DATE_BN, > FST_NAME, > LST_NAME > FROM > PERSONNEL P > ) T >WHERE > T.RN = 1; > >ALTER TABLE PERSONNEL ACTIVATE NOT LOGGED INITIALLY WITH EMPTY TABLE; > >INSERT INTO > PERSONNEL >SELECT > * >FROM > PERSONNEL_TEMP; > >You can also do the truncating by loading the table from the null >device. > >Regards -- Message posted via http://www.dbmonster.com |
|
#4
|
| On Sep 8, 4:33*am, "lenygold via DBMonster.com" > Thank you jefftyzzer for your help. > I don't have any problem with delteting dups. > But what i am looking for is one UPDATE STATEMENT to fix SSN and making them > Unique. > This is test table and i don't care about SSN value. What do you think about > Generate Unique? > Thank's in advance Leny G. > > > > jefftyzzer wrote: > >> I created on home PC a table more than 1 Million rows using example from DB2 > >> COOKBOOK: > >[quoted text clipped - 137 lines] > >> -- > >> Message posted viahttp://www.dbmonster.com > > >Lenny, > > >Here's a common approach to deleting dups: > > >DELETE FROM > > * *( > > * *SELECT > > * * * * * *ROW_NUMBER() OVER (PARTITION BY EMP#) RN > > * *FROM > > * * * * * *PERSONNEL > > * *) T > >WHERE > > * *T.RN > 1; > > >Also, I applaud your diligence with CHECK constraints, but think you > >could probably combine constraints PE02 through PE04 by simply ANDing > >the conditions (but that would, admittedly, give you a less precise > >idea of what the issue was in the event the constraint was violated). > > >BTW, if it turns out that you'll end up deleting more rows than you're > >keeping, you may want to consider INSERTing the "keeper" rows into a > >temporary/staging table and simply truncating the permanent table and > >re-INSERTING into it. Something like: > > >INSERT INTO PERSONNEL_TEMP > >SELECT > > * *T.* > >FROM > > * *( > > * *SELECT > > * * * * * *ROW_NUMBER() OVER (PARTITION BY EMP#) RN, > > * * * * * *EMP#, > > * * * * * *SOCSEC#, > > * * * * * *JOB_FTN, > > * * * * * *DEPT, > > * * * * * *SALARY, > > * * * * * *DATE_BN, > > * * * * * *FST_NAME, > > * * * * * *LST_NAME > > * *FROM > > * * * * * *PERSONNEL P > > * *) T > >WHERE > > * *T.RN = 1; > > >ALTER TABLE PERSONNEL ACTIVATE NOT LOGGED INITIALLY WITH EMPTY TABLE; > > >INSERT INTO > > * *PERSONNEL > >SELECT > > * ** > >FROM > > * *PERSONNEL_TEMP; > > >You can also do the truncating by loading the table from the null > >device. > > >Regards > > -- > Message posted viahttp://www.dbmonster.com Leny, Sure--you could certainly start with the output of generate_unique(), but you'd have to eliminate four digits from its ouput--and slide the dashes into the appropriate spots--to fit it into your field. So, I suppose there's the possibility that the four digits you eliminate are the ones that made the number unique, and thus you'd have to handle collisions in your insertion scheme. Regards, --Jeff |
![]() |
| Thread Tools | |
| Display Modes | |